logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git

cut.c (8894B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 202405L
  5. #include "../lib/reallocarray.h"
  6. #include <assert.h>
  7. #include <ctype.h>
  8. #include <errno.h>
  9. #include <locale.h>
  10. #include <stdbool.h>
  11. #include <stdint.h> // size_t
  12. #include <stdio.h> // fprintf, fopen
  13. #include <string.h> // strerror
  14. #include <unistd.h> // getopt
  15. #include <wchar.h>
  16. #undef MIN
  17. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  18. enum cut_mode
  19. {
  20. CUT_MODE_NONE = 0,
  21. CUT_MODE_B = 1,
  22. CUT_MODE_C = 2,
  23. CUT_MODE_F = 3,
  24. };
  25. char delim = '\t';
  26. bool opt_n = false, opt_s = false;
  27. enum cut_mode mode = CUT_MODE_NONE;
  28. bool *list = NULL;
  29. size_t list_len = 0;
  30. bool nostop = false;
  31. const char *argv0 = "cut";
  32. static size_t
  33. parse_list_num(char **s)
  34. {
  35. char *endptr = NULL;
  36. errno = 0;
  37. size_t n = strtoul(*s, &endptr, 10);
  38. if(errno != 0)
  39. {
  40. fprintf(stderr, "%s: error: Failed parsing '%s' as a number: %s\n", argv0, *s, strerror(errno));
  41. return 0;
  42. }
  43. if(n < 1)
  44. {
  45. fprintf(stderr, "%s: error: Invalid number in list: %zu\n", argv0, n);
  46. return 0;
  47. }
  48. if(endptr != NULL && strchr(",-", *endptr) == NULL)
  49. {
  50. fprintf(stderr, "%s: error: Invalid character in list: %c\n", argv0, *endptr);
  51. return 0;
  52. }
  53. *s = endptr;
  54. return n;
  55. }
  56. static int
  57. parse_list(char *s)
  58. {
  59. while(true)
  60. {
  61. if(s == NULL || *s == '\0') break;
  62. if(*s == ',')
  63. {
  64. fprintf(stderr, "%s: error: Empty list element\n", argv0);
  65. return -1;
  66. }
  67. size_t min = 1;
  68. if(*s != '-')
  69. {
  70. min = parse_list_num(&s);
  71. if(min == 0) return -1;
  72. }
  73. // min-- as cut(1) is 1-indexed and max needs to be at least min+1
  74. size_t max = min--;
  75. if(s && *s == '-')
  76. {
  77. s++;
  78. if(!isdigit(*s))
  79. {
  80. nostop = true;
  81. }
  82. else
  83. {
  84. max = parse_list_num(&s);
  85. if(max == 0) return -1;
  86. if(max < min)
  87. {
  88. fprintf(stderr, "%s: error: Decreasing range: %zu-%zu\n", argv0, min, max);
  89. return -1;
  90. }
  91. }
  92. }
  93. // Needs to be after *s == '-'
  94. if(s && *s == ',') s++;
  95. if(max > list_len)
  96. {
  97. list = reallocarray(list, max, sizeof(*list));
  98. if(list == NULL)
  99. {
  100. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  101. return -1;
  102. }
  103. if(min > list_len)
  104. {
  105. memset(list + list_len, 0, min - list_len);
  106. }
  107. list_len = max;
  108. }
  109. memset(list + min, 1, max - min);
  110. }
  111. if(list_len == 0)
  112. {
  113. fprintf(stderr, "%s: error: Empty list\n", argv0);
  114. return -1;
  115. }
  116. return 0;
  117. }
  118. static int
  119. cut_b(FILE *in, const char *filename)
  120. {
  121. char *line = NULL;
  122. size_t line_len = 0;
  123. int err = 0;
  124. while(err == 0)
  125. {
  126. errno = 0;
  127. ssize_t nread = getline(&line, &line_len, in);
  128. if(nread < 0)
  129. {
  130. if(errno != 0)
  131. {
  132. fprintf(
  133. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  134. err = 1;
  135. }
  136. break;
  137. }
  138. if(nread == 0)
  139. {
  140. fputc('\n', stdout);
  141. continue;
  142. }
  143. if(line[nread - 1] == '\n') line[nread--] = '\0';
  144. for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
  145. if(list[i]) fputc(line[i], stdout);
  146. if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  147. fputc('\n', stdout);
  148. }
  149. if(line_len != 0) free(line);
  150. return err;
  151. }
  152. static int
  153. cut_c(FILE *in, const char *filename)
  154. {
  155. char *line = NULL;
  156. size_t line_len = 0;
  157. int err = 0;
  158. wchar_t *line_w = NULL;
  159. ssize_t line_wsz = 0;
  160. while(err == 0)
  161. {
  162. errno = 0;
  163. ssize_t nread = getline(&line, &line_len, in);
  164. if(nread < 0)
  165. {
  166. if(errno != 0)
  167. {
  168. fprintf(
  169. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  170. err = 1;
  171. }
  172. break;
  173. }
  174. if(nread == 0)
  175. {
  176. fputc('\n', stdout);
  177. continue;
  178. }
  179. if(line[nread - 1] == '\n') line[nread--] = '\0';
  180. if(nread > line_wsz)
  181. {
  182. line_w = reallocarray(line_w, nread, sizeof(*line_w));
  183. if(line_w == NULL)
  184. {
  185. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  186. return -1;
  187. }
  188. line_wsz = nread;
  189. }
  190. assert(line_wsz > 0);
  191. size_t wcread = mbstowcs(line_w, line, line_wsz);
  192. if(wcread == (size_t)-1)
  193. {
  194. fprintf(stderr,
  195. "%s: error: Failed parsing characters in file '%s': %s\n",
  196. argv0,
  197. filename,
  198. strerror(errno));
  199. err = 1;
  200. break;
  201. }
  202. //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
  203. size_t i = 0;
  204. for(; i < MIN(list_len, wcread); i++)
  205. if(list[i]) fputwc(line_w[i], stdout);
  206. if(nostop && wcread > list_len)
  207. {
  208. for(; i < wcread; i++)
  209. fputwc(line_w[i], stdout);
  210. }
  211. fputc('\n', stdout);
  212. }
  213. if(line_len != 0) free(line);
  214. return err;
  215. }
  216. static int
  217. cut_f(FILE *in, const char *filename)
  218. {
  219. char *line = NULL;
  220. size_t line_len = 0;
  221. int err = 0;
  222. while(err == 0)
  223. {
  224. errno = 0;
  225. ssize_t nread = getline(&line, &line_len, in);
  226. if(nread < 0)
  227. {
  228. if(errno != 0)
  229. {
  230. fprintf(
  231. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  232. err = 1;
  233. }
  234. break;
  235. }
  236. if(nread == 0)
  237. {
  238. fputc('\n', stdout);
  239. continue;
  240. }
  241. if(line[nread - 1] == '\n') line[--nread] = '\0';
  242. size_t di = 0;
  243. for(; di < (size_t)nread; di++)
  244. if(line[di] == delim) break;
  245. if(di == (size_t)nread)
  246. {
  247. if(!opt_s) puts(line);
  248. continue;
  249. }
  250. bool need_sep = false;
  251. char *c = line;
  252. for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
  253. {
  254. if(pos < (size_t)nread && line[pos] != delim) continue;
  255. line[pos] = '\0';
  256. if(i >= list_len)
  257. {
  258. if(!nostop) break;
  259. if(need_sep) fputc(delim, stdout);
  260. fputs(c, stdout);
  261. need_sep = true;
  262. }
  263. else if(list[i])
  264. {
  265. if(need_sep) fputc(delim, stdout);
  266. fputs(c, stdout);
  267. need_sep = true;
  268. }
  269. i++;
  270. c = line + pos + 1;
  271. }
  272. fputc('\n', stdout);
  273. }
  274. if(line_len != 0) free(line);
  275. return err;
  276. }
  277. static int
  278. cut(FILE *in, const char *filename)
  279. {
  280. switch(mode)
  281. {
  282. case CUT_MODE_NONE:
  283. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  284. return 1;
  285. case CUT_MODE_B:
  286. return cut_b(in, filename);
  287. case CUT_MODE_C:
  288. return cut_c(in, filename);
  289. case CUT_MODE_F:
  290. return cut_f(in, filename);
  291. default:
  292. abort();
  293. }
  294. }
  295. int
  296. main(int argc, char *argv[])
  297. {
  298. char *opt_list = NULL;
  299. errno = 0;
  300. setlocale(LC_ALL, "");
  301. if(errno != 0)
  302. {
  303. fprintf(stderr, "%s: warning: Failed to initialize locales: %s\n", argv0, strerror(errno));
  304. errno = 0;
  305. }
  306. int c = -1;
  307. while((c = getopt(argc, argv, ":b:c:d:f:ns")) != -1)
  308. {
  309. switch(c)
  310. {
  311. case 'b':
  312. if(opt_list != NULL)
  313. {
  314. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  315. return 1;
  316. }
  317. mode = CUT_MODE_B;
  318. opt_list = optarg;
  319. break;
  320. case 'c':
  321. if(opt_list != NULL)
  322. {
  323. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  324. return 1;
  325. }
  326. mode = CUT_MODE_C;
  327. opt_list = optarg;
  328. break;
  329. case 'f':
  330. if(opt_list != NULL)
  331. {
  332. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  333. return 1;
  334. }
  335. mode = CUT_MODE_F;
  336. opt_list = optarg;
  337. break;
  338. case 'd':
  339. if(optarg[0] != '\0' && optarg[1] != '\0')
  340. {
  341. fprintf(stderr,
  342. "%s: error: Option '-d' only accepts single characters, got \"%s\"\n",
  343. argv0,
  344. optarg);
  345. return 1;
  346. }
  347. delim = optarg[0];
  348. break;
  349. case 'n':
  350. opt_n = true;
  351. break;
  352. case 's':
  353. opt_s = true;
  354. break;
  355. case ':':
  356. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  357. return 1;
  358. case '?':
  359. fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  360. return 1;
  361. default:
  362. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  363. abort();
  364. }
  365. }
  366. argc -= optind;
  367. argv += optind;
  368. if(mode == CUT_MODE_NONE)
  369. {
  370. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  371. return 1;
  372. }
  373. if(parse_list(opt_list) < 0) return 1;
  374. #if 0
  375. fprintf(stderr, "[DEBUG] list: ");
  376. for(size_t i = 0; i < list_len; i++)
  377. {
  378. fputc(list[i] ? '1' : '0', stderr);
  379. }
  380. fputc('\n', stderr);
  381. #endif
  382. if(argc <= 0) return cut(stdin, "<stdin>");
  383. for(int i = 0; i < argc; i++)
  384. {
  385. FILE *in = fopen(argv[i], "r");
  386. if(in == NULL)
  387. {
  388. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, argv[i], strerror(errno));
  389. return 1;
  390. }
  391. int ret = cut(in, argv[i]);
  392. if(fclose(in) < 0)
  393. {
  394. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, argv[i], strerror(errno));
  395. return 1;
  396. }
  397. if(ret != 0) return 1;
  398. }
  399. if(fclose(stdin) != 0)
  400. {
  401. fprintf(stderr, "%s: error: Failed closing <stdin>: %s\n", argv0, strerror(errno));
  402. return 1;
  403. }
  404. if(fclose(stdout) != 0)
  405. {
  406. fprintf(stderr, "%s: error: Failed closing <stdout>: %s\n", argv0, strerror(errno));
  407. return 1;
  408. }
  409. return 0;
  410. }