logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

cut.c (10976B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 202405L
  5. #include "../config.h"
  6. #include "../lib/reallocarray.h"
  7. #include "../libutils/getopt_nolong.h"
  8. #include <assert.h>
  9. #include <ctype.h>
  10. #include <errno.h>
  11. #include <locale.h>
  12. #include <stdbool.h>
  13. #include <stdint.h> // size_t
  14. #include <stdio.h> // fprintf, fopen
  15. #include <string.h> // strerror
  16. #include <unistd.h> // getopt
  17. #include <wchar.h>
  18. #ifdef HAS_GETOPT_LONG
  19. #include <getopt.h>
  20. #endif
  21. #undef MIN
  22. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  23. enum cut_mode
  24. {
  25. CUT_MODE_NONE = 0,
  26. CUT_MODE_B = 1,
  27. CUT_MODE_C = 2,
  28. CUT_MODE_F = 3,
  29. };
  30. char delim = '\t';
  31. char line_delim = '\n';
  32. wchar_t line_delim_w = L'\n';
  33. bool opt_n = false, opt_s = false;
  34. enum cut_mode mode = CUT_MODE_NONE;
  35. bool *list = NULL;
  36. size_t list_len = 0;
  37. bool nostop = false;
  38. const char *argv0 = "cut";
  39. static size_t
  40. parse_list_num(char **s)
  41. {
  42. char *endptr = NULL;
  43. errno = 0;
  44. size_t n = strtoul(*s, &endptr, 10);
  45. if(errno != 0)
  46. {
  47. fprintf(stderr, "%s: error: Failed parsing '%s' as a number: %s\n", argv0, *s, strerror(errno));
  48. return 0;
  49. }
  50. if(n < 1)
  51. {
  52. fprintf(stderr, "%s: error: Invalid number in list: %zu\n", argv0, n);
  53. return 0;
  54. }
  55. if(endptr != NULL && strchr(",-", *endptr) == NULL)
  56. {
  57. fprintf(stderr, "%s: error: Invalid character in list: %c\n", argv0, *endptr);
  58. return 0;
  59. }
  60. *s = endptr;
  61. return n;
  62. }
  63. static int
  64. parse_list(char *s)
  65. {
  66. while(true)
  67. {
  68. if(s == NULL || *s == '\0') break;
  69. if(*s == ',')
  70. {
  71. fprintf(stderr, "%s: error: Empty list element\n", argv0);
  72. return -1;
  73. }
  74. size_t min = 1;
  75. if(*s != '-')
  76. {
  77. min = parse_list_num(&s);
  78. if(min == 0) return -1;
  79. }
  80. // min-- as cut(1) is 1-indexed and max needs to be at least min+1
  81. size_t max = min--;
  82. if(s && *s == '-')
  83. {
  84. s++;
  85. if(!isdigit(*s))
  86. {
  87. nostop = true;
  88. }
  89. else
  90. {
  91. max = parse_list_num(&s);
  92. if(max == 0) return -1;
  93. if(max < min)
  94. {
  95. fprintf(stderr, "%s: error: Decreasing range: %zu-%zu\n", argv0, min, max);
  96. return -1;
  97. }
  98. }
  99. }
  100. // Needs to be after *s == '-'
  101. if(s && *s == ',') s++;
  102. if(max > list_len)
  103. {
  104. list = reallocarray(list, max, sizeof(*list));
  105. if(list == NULL)
  106. {
  107. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  108. return -1;
  109. }
  110. if(min > list_len)
  111. {
  112. memset(list + list_len, 0, min - list_len);
  113. }
  114. list_len = max;
  115. }
  116. memset(list + min, 1, max - min);
  117. }
  118. if(list_len == 0)
  119. {
  120. fprintf(stderr, "%s: error: Empty list\n", argv0);
  121. return -1;
  122. }
  123. return 0;
  124. }
  125. static int
  126. cut_b(FILE *in, const char *filename)
  127. {
  128. char *line = NULL;
  129. size_t line_len = 0;
  130. int err = 0;
  131. while(err == 0)
  132. {
  133. errno = 0;
  134. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  135. if(nread < 0)
  136. {
  137. if(errno != 0)
  138. {
  139. fprintf(
  140. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  141. err = 1;
  142. }
  143. break;
  144. }
  145. if(nread == 0)
  146. {
  147. fputc(line_delim, stdout);
  148. continue;
  149. }
  150. if(line[nread - 1] == '\n') line[nread--] = '\0';
  151. for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
  152. if(list[i]) fputc(line[i], stdout);
  153. if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  154. fputc(line_delim, stdout);
  155. }
  156. free(line);
  157. return err;
  158. }
  159. static int
  160. cut_b_n(FILE *in, const char *filename)
  161. {
  162. char *line = NULL;
  163. size_t line_len = 0;
  164. int err = 0;
  165. while(err == 0)
  166. {
  167. errno = 0;
  168. ssize_t sread = getdelim(&line, &line_len, line_delim, in);
  169. if(sread < 0)
  170. {
  171. if(errno != 0)
  172. {
  173. fprintf(
  174. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  175. err = 1;
  176. }
  177. break;
  178. }
  179. size_t nread = (size_t)sread;
  180. if(nread == 0)
  181. {
  182. fputc(line_delim, stdout);
  183. continue;
  184. }
  185. if(line[nread - 1] == '\n') line[nread--] = '\0';
  186. for(size_t i = 0; i < MIN(list_len, nread); i++)
  187. {
  188. size_t isz = mbrlen(line + i, nread, NULL);
  189. if(isz == 0 || isz == (size_t)-2 || isz == (size_t)-1) continue;
  190. /*
  191. * Check that last byte is part of the low-high selection.
  192. * Per POSIX.1-2024 high and low only decrements, and otherwise drops the character.
  193. */
  194. if(list[i + isz - 1])
  195. {
  196. fwrite(line + i, isz, 1, stdout);
  197. if(isz > 1) i += isz - 1;
  198. }
  199. }
  200. if(nostop && nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  201. fputc(line_delim, stdout);
  202. }
  203. free(line);
  204. return err;
  205. }
  206. static int
  207. cut_c(FILE *in, const char *filename)
  208. {
  209. char *line = NULL;
  210. size_t line_len = 0;
  211. int err = 0;
  212. wchar_t *line_w = NULL;
  213. ssize_t line_wsz = 0;
  214. while(err == 0)
  215. {
  216. errno = 0;
  217. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  218. if(nread < 0)
  219. {
  220. if(errno != 0)
  221. {
  222. fprintf(
  223. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  224. err = 1;
  225. }
  226. break;
  227. }
  228. if(nread == 0)
  229. {
  230. fputwc(line_delim_w, stdout);
  231. continue;
  232. }
  233. if(line[nread - 1] == '\n') line[nread--] = '\0';
  234. if(nread > line_wsz)
  235. {
  236. line_w = reallocarray(line_w, nread, sizeof(*line_w));
  237. if(line_w == NULL)
  238. {
  239. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  240. err = 1;
  241. break;
  242. }
  243. line_wsz = nread;
  244. }
  245. assert(line_wsz > 0);
  246. size_t wcread = mbstowcs(line_w, line, line_wsz);
  247. if(wcread == (size_t)-1)
  248. {
  249. fprintf(stderr,
  250. "%s: error: Failed parsing characters in file '%s': %s\n",
  251. argv0,
  252. filename,
  253. strerror(errno));
  254. err = 1;
  255. break;
  256. }
  257. //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
  258. size_t i = 0;
  259. for(; i < MIN(list_len, wcread); i++)
  260. if(list[i]) fputwc(line_w[i], stdout);
  261. if(nostop && wcread > list_len)
  262. {
  263. for(; i < wcread; i++)
  264. fputwc(line_w[i], stdout);
  265. }
  266. fputwc(line_delim_w, stdout);
  267. }
  268. free(line);
  269. free(line_w);
  270. return err;
  271. }
  272. static int
  273. cut_f(FILE *in, const char *filename)
  274. {
  275. char *line = NULL;
  276. size_t line_len = 0;
  277. int err = 0;
  278. while(err == 0)
  279. {
  280. errno = 0;
  281. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  282. if(nread < 0)
  283. {
  284. if(errno != 0)
  285. {
  286. fprintf(
  287. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  288. err = 1;
  289. }
  290. break;
  291. }
  292. if(nread == 0)
  293. {
  294. fputc(line_delim, stdout);
  295. continue;
  296. }
  297. if(line[nread - 1] == '\n') line[--nread] = '\0';
  298. size_t di = 0;
  299. for(; di < (size_t)nread; di++)
  300. if(line[di] == delim) break;
  301. if(di == (size_t)nread)
  302. {
  303. if(!opt_s) puts(line);
  304. continue;
  305. }
  306. bool need_sep = false;
  307. char *c = line;
  308. for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
  309. {
  310. if(pos < (size_t)nread && line[pos] != delim) continue;
  311. line[pos] = '\0';
  312. if(i >= list_len)
  313. {
  314. if(!nostop) break;
  315. if(need_sep) fputc(delim, stdout);
  316. fputs(c, stdout);
  317. need_sep = true;
  318. }
  319. else if(list[i])
  320. {
  321. if(need_sep) fputc(delim, stdout);
  322. fputs(c, stdout);
  323. need_sep = true;
  324. }
  325. i++;
  326. c = line + pos + 1;
  327. }
  328. fputc(line_delim, stdout);
  329. }
  330. free(line);
  331. return err;
  332. }
  333. static int
  334. cut(FILE *in, const char *filename)
  335. {
  336. switch(mode)
  337. {
  338. case CUT_MODE_NONE:
  339. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  340. return 1;
  341. case CUT_MODE_B:
  342. return opt_n ? cut_b_n(in, filename) : cut_b(in, filename);
  343. case CUT_MODE_C:
  344. return cut_c(in, filename);
  345. case CUT_MODE_F:
  346. return cut_f(in, filename);
  347. default:
  348. abort();
  349. }
  350. }
  351. int
  352. main(int argc, char *argv[])
  353. {
  354. char *opt_list = NULL;
  355. char *lc_all = setlocale(LC_ALL, "");
  356. if(lc_all == NULL)
  357. {
  358. fprintf(stderr,
  359. "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
  360. argv0,
  361. strerror(errno));
  362. }
  363. errno = 0;
  364. #ifdef HAS_GETOPT_LONG
  365. // Strictly for GNUisms compatibility so no long-only options
  366. // clang-format off
  367. static struct option opts[] = {
  368. {"bytes", required_argument, NULL, 'b'},
  369. {"characters", required_argument, NULL, 'c'},
  370. {"delimiter", required_argument, NULL, 'd'},
  371. {"fields", required_argument, NULL, 'f'},
  372. {"only-delimited", no_argument, NULL, 's'},
  373. {"zero-terminated", no_argument, NULL, 'z'},
  374. {0, 0, 0, 0},
  375. };
  376. // clang-format on
  377. // Need + as first character to get POSIX-style option parsing
  378. for(int c = -1; (c = getopt_long(argc, argv, "+:b:c:d:f:nsz", opts, NULL)) != -1;)
  379. #else
  380. for(int c = -1; (c = getopt_nolong(argc, argv, ":b:c:d:f:nsz")) != -1;)
  381. #endif
  382. {
  383. switch(c)
  384. {
  385. case 'b':
  386. if(opt_list != NULL)
  387. {
  388. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  389. return 1;
  390. }
  391. mode = CUT_MODE_B;
  392. opt_list = optarg;
  393. break;
  394. case 'c':
  395. if(opt_list != NULL)
  396. {
  397. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  398. return 1;
  399. }
  400. mode = CUT_MODE_C;
  401. opt_list = optarg;
  402. break;
  403. case 'f':
  404. if(opt_list != NULL)
  405. {
  406. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  407. return 1;
  408. }
  409. mode = CUT_MODE_F;
  410. opt_list = optarg;
  411. break;
  412. case 'd':
  413. if(optarg[0] != '\0' && optarg[1] != '\0')
  414. {
  415. fprintf(stderr,
  416. "%s: error: Option '-d' only accepts single characters, got \"%s\"\n",
  417. argv0,
  418. optarg);
  419. return 1;
  420. }
  421. delim = optarg[0];
  422. break;
  423. case 'n':
  424. opt_n = true;
  425. break;
  426. case 's':
  427. opt_s = true;
  428. break;
  429. case 'z':
  430. line_delim = '\0';
  431. line_delim_w = L'\0';
  432. break;
  433. case ':':
  434. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  435. return 1;
  436. case '?':
  437. GETOPT_UNKNOWN_OPT
  438. return 1;
  439. default:
  440. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  441. abort();
  442. }
  443. }
  444. argc -= optind;
  445. argv += optind;
  446. if(mode == CUT_MODE_NONE)
  447. {
  448. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  449. return 1;
  450. }
  451. if(parse_list(opt_list) < 0) return 1;
  452. #if 0
  453. fprintf(stderr, "[DEBUG] list: ");
  454. for(size_t i = 0; i < list_len; i++)
  455. {
  456. fputc(list[i] ? '1' : '0', stderr);
  457. }
  458. fputc('\n', stderr);
  459. #endif
  460. if(argc <= 0) return cut(stdin, "<stdin>");
  461. for(int i = 0; i < argc; i++)
  462. {
  463. FILE *in = fopen(argv[i], "r");
  464. if(in == NULL)
  465. {
  466. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, argv[i], strerror(errno));
  467. return 1;
  468. }
  469. int ret = cut(in, argv[i]);
  470. if(fclose(in) < 0)
  471. {
  472. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, argv[i], strerror(errno));
  473. return 1;
  474. }
  475. if(ret != 0) return 1;
  476. }
  477. if(fclose(stdin) != 0)
  478. {
  479. fprintf(stderr, "%s: error: Failed closing <stdin>: %s\n", argv0, strerror(errno));
  480. return 1;
  481. }
  482. if(fclose(stdout) != 0)
  483. {
  484. fprintf(stderr, "%s: error: Failed closing <stdout>: %s\n", argv0, strerror(errno));
  485. return 1;
  486. }
  487. return 0;
  488. }