logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

cut.c (9842B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 202405L
  5. #include "../config.h"
  6. #include "../lib/reallocarray.h"
  7. #include "../libutils/getopt_nolong.h"
  8. #include <assert.h>
  9. #include <ctype.h>
  10. #include <errno.h>
  11. #include <locale.h>
  12. #include <stdbool.h>
  13. #include <stdint.h> // size_t
  14. #include <stdio.h> // fprintf, fopen
  15. #include <string.h> // strerror
  16. #include <unistd.h> // getopt
  17. #include <wchar.h>
  18. #ifdef HAS_GETOPT_LONG
  19. #include <getopt.h>
  20. #endif
  21. #undef MIN
  22. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  23. enum cut_mode
  24. {
  25. CUT_MODE_NONE = 0,
  26. CUT_MODE_B = 1,
  27. CUT_MODE_C = 2,
  28. CUT_MODE_F = 3,
  29. };
  30. char delim = '\t';
  31. char line_delim = '\n';
  32. wchar_t line_delim_w = L'\n';
  33. bool opt_n = false, opt_s = false;
  34. enum cut_mode mode = CUT_MODE_NONE;
  35. bool *list = NULL;
  36. size_t list_len = 0;
  37. bool nostop = false;
  38. const char *argv0 = "cut";
  39. static size_t
  40. parse_list_num(char **s)
  41. {
  42. char *endptr = NULL;
  43. errno = 0;
  44. size_t n = strtoul(*s, &endptr, 10);
  45. if(errno != 0)
  46. {
  47. fprintf(stderr, "%s: error: Failed parsing '%s' as a number: %s\n", argv0, *s, strerror(errno));
  48. return 0;
  49. }
  50. if(n < 1)
  51. {
  52. fprintf(stderr, "%s: error: Invalid number in list: %zu\n", argv0, n);
  53. return 0;
  54. }
  55. if(endptr != NULL && strchr(",-", *endptr) == NULL)
  56. {
  57. fprintf(stderr, "%s: error: Invalid character in list: %c\n", argv0, *endptr);
  58. return 0;
  59. }
  60. *s = endptr;
  61. return n;
  62. }
  63. static int
  64. parse_list(char *s)
  65. {
  66. while(true)
  67. {
  68. if(s == NULL || *s == '\0') break;
  69. if(*s == ',')
  70. {
  71. fprintf(stderr, "%s: error: Empty list element\n", argv0);
  72. return -1;
  73. }
  74. size_t min = 1;
  75. if(*s != '-')
  76. {
  77. min = parse_list_num(&s);
  78. if(min == 0) return -1;
  79. }
  80. // min-- as cut(1) is 1-indexed and max needs to be at least min+1
  81. size_t max = min--;
  82. if(s && *s == '-')
  83. {
  84. s++;
  85. if(!isdigit(*s))
  86. {
  87. nostop = true;
  88. }
  89. else
  90. {
  91. max = parse_list_num(&s);
  92. if(max == 0) return -1;
  93. if(max < min)
  94. {
  95. fprintf(stderr, "%s: error: Decreasing range: %zu-%zu\n", argv0, min, max);
  96. return -1;
  97. }
  98. }
  99. }
  100. // Needs to be after *s == '-'
  101. if(s && *s == ',') s++;
  102. if(max > list_len)
  103. {
  104. list = reallocarray(list, max, sizeof(*list));
  105. if(list == NULL)
  106. {
  107. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  108. return -1;
  109. }
  110. if(min > list_len)
  111. {
  112. memset(list + list_len, 0, min - list_len);
  113. }
  114. list_len = max;
  115. }
  116. memset(list + min, 1, max - min);
  117. }
  118. if(list_len == 0)
  119. {
  120. fprintf(stderr, "%s: error: Empty list\n", argv0);
  121. return -1;
  122. }
  123. return 0;
  124. }
  125. static int
  126. cut_b(FILE *in, const char *filename)
  127. {
  128. char *line = NULL;
  129. size_t line_len = 0;
  130. int err = 0;
  131. while(err == 0)
  132. {
  133. errno = 0;
  134. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  135. if(nread < 0)
  136. {
  137. if(errno != 0)
  138. {
  139. fprintf(
  140. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  141. err = 1;
  142. }
  143. break;
  144. }
  145. if(nread == 0)
  146. {
  147. fputc(line_delim, stdout);
  148. continue;
  149. }
  150. if(line[nread - 1] == '\n') line[nread--] = '\0';
  151. for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
  152. if(list[i]) fputc(line[i], stdout);
  153. if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  154. fputc(line_delim, stdout);
  155. }
  156. if(line_len != 0) free(line);
  157. return err;
  158. }
  159. static int
  160. cut_c(FILE *in, const char *filename)
  161. {
  162. char *line = NULL;
  163. size_t line_len = 0;
  164. int err = 0;
  165. wchar_t *line_w = NULL;
  166. ssize_t line_wsz = 0;
  167. while(err == 0)
  168. {
  169. errno = 0;
  170. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  171. if(nread < 0)
  172. {
  173. if(errno != 0)
  174. {
  175. fprintf(
  176. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  177. err = 1;
  178. }
  179. break;
  180. }
  181. if(nread == 0)
  182. {
  183. fputwc(line_delim_w, stdout);
  184. continue;
  185. }
  186. if(line[nread - 1] == '\n') line[nread--] = '\0';
  187. if(nread > line_wsz)
  188. {
  189. line_w = reallocarray(line_w, nread, sizeof(*line_w));
  190. if(line_w == NULL)
  191. {
  192. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  193. err = 1;
  194. break;
  195. }
  196. line_wsz = nread;
  197. }
  198. assert(line_wsz > 0);
  199. size_t wcread = mbstowcs(line_w, line, line_wsz);
  200. if(wcread == (size_t)-1)
  201. {
  202. fprintf(stderr,
  203. "%s: error: Failed parsing characters in file '%s': %s\n",
  204. argv0,
  205. filename,
  206. strerror(errno));
  207. err = 1;
  208. break;
  209. }
  210. //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
  211. size_t i = 0;
  212. for(; i < MIN(list_len, wcread); i++)
  213. if(list[i]) fputwc(line_w[i], stdout);
  214. if(nostop && wcread > list_len)
  215. {
  216. for(; i < wcread; i++)
  217. fputwc(line_w[i], stdout);
  218. }
  219. fputwc(line_delim_w, stdout);
  220. }
  221. free(line);
  222. free(line_w);
  223. return err;
  224. }
  225. static int
  226. cut_f(FILE *in, const char *filename)
  227. {
  228. char *line = NULL;
  229. size_t line_len = 0;
  230. int err = 0;
  231. while(err == 0)
  232. {
  233. errno = 0;
  234. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  235. if(nread < 0)
  236. {
  237. if(errno != 0)
  238. {
  239. fprintf(
  240. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  241. err = 1;
  242. }
  243. break;
  244. }
  245. if(nread == 0)
  246. {
  247. fputc(line_delim, stdout);
  248. continue;
  249. }
  250. if(line[nread - 1] == '\n') line[--nread] = '\0';
  251. size_t di = 0;
  252. for(; di < (size_t)nread; di++)
  253. if(line[di] == delim) break;
  254. if(di == (size_t)nread)
  255. {
  256. if(!opt_s) puts(line);
  257. continue;
  258. }
  259. bool need_sep = false;
  260. char *c = line;
  261. for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
  262. {
  263. if(pos < (size_t)nread && line[pos] != delim) continue;
  264. line[pos] = '\0';
  265. if(i >= list_len)
  266. {
  267. if(!nostop) break;
  268. if(need_sep) fputc(delim, stdout);
  269. fputs(c, stdout);
  270. need_sep = true;
  271. }
  272. else if(list[i])
  273. {
  274. if(need_sep) fputc(delim, stdout);
  275. fputs(c, stdout);
  276. need_sep = true;
  277. }
  278. i++;
  279. c = line + pos + 1;
  280. }
  281. fputc(line_delim, stdout);
  282. }
  283. if(line_len != 0) free(line);
  284. return err;
  285. }
  286. static int
  287. cut(FILE *in, const char *filename)
  288. {
  289. switch(mode)
  290. {
  291. case CUT_MODE_NONE:
  292. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  293. return 1;
  294. case CUT_MODE_B:
  295. return cut_b(in, filename);
  296. case CUT_MODE_C:
  297. return cut_c(in, filename);
  298. case CUT_MODE_F:
  299. return cut_f(in, filename);
  300. default:
  301. abort();
  302. }
  303. }
  304. int
  305. main(int argc, char *argv[])
  306. {
  307. char *opt_list = NULL;
  308. char *lc_all = setlocale(LC_ALL, "");
  309. if(lc_all == NULL)
  310. {
  311. fprintf(stderr,
  312. "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
  313. argv0,
  314. strerror(errno));
  315. }
  316. errno = 0;
  317. #ifdef HAS_GETOPT_LONG
  318. // Strictly for GNUisms compatibility so no long-only options
  319. // clang-format off
  320. static struct option opts[] = {
  321. {"bytes", required_argument, NULL, 'b'},
  322. {"characters", required_argument, NULL, 'c'},
  323. {"delimiter", required_argument, NULL, 'd'},
  324. {"fields", required_argument, NULL, 'f'},
  325. {"only-delimited", no_argument, NULL, 's'},
  326. {"zero-terminated", no_argument, NULL, 'z'},
  327. {0, 0, 0, 0},
  328. };
  329. // clang-format on
  330. // Need + as first character to get POSIX-style option parsing
  331. for(int c = -1; (c = getopt_long(argc, argv, "+:b:c:d:f:nsz", opts, NULL)) != -1;)
  332. #else
  333. for(int c = -1; (c = getopt_nolong(argc, argv, ":b:c:d:f:nsz")) != -1;)
  334. #endif
  335. {
  336. switch(c)
  337. {
  338. case 'b':
  339. if(opt_list != NULL)
  340. {
  341. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  342. return 1;
  343. }
  344. mode = CUT_MODE_B;
  345. opt_list = optarg;
  346. break;
  347. case 'c':
  348. if(opt_list != NULL)
  349. {
  350. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  351. return 1;
  352. }
  353. mode = CUT_MODE_C;
  354. opt_list = optarg;
  355. break;
  356. case 'f':
  357. if(opt_list != NULL)
  358. {
  359. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  360. return 1;
  361. }
  362. mode = CUT_MODE_F;
  363. opt_list = optarg;
  364. break;
  365. case 'd':
  366. if(optarg[0] != '\0' && optarg[1] != '\0')
  367. {
  368. fprintf(stderr,
  369. "%s: error: Option '-d' only accepts single characters, got \"%s\"\n",
  370. argv0,
  371. optarg);
  372. return 1;
  373. }
  374. delim = optarg[0];
  375. break;
  376. case 'n':
  377. opt_n = true;
  378. break;
  379. case 's':
  380. opt_s = true;
  381. break;
  382. case 'z':
  383. line_delim = '\0';
  384. line_delim_w = L'\0';
  385. break;
  386. case ':':
  387. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  388. return 1;
  389. case '?':
  390. GETOPT_UNKNOWN_OPT
  391. return 1;
  392. default:
  393. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  394. abort();
  395. }
  396. }
  397. argc -= optind;
  398. argv += optind;
  399. if(mode == CUT_MODE_NONE)
  400. {
  401. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  402. return 1;
  403. }
  404. if(parse_list(opt_list) < 0) return 1;
  405. #if 0
  406. fprintf(stderr, "[DEBUG] list: ");
  407. for(size_t i = 0; i < list_len; i++)
  408. {
  409. fputc(list[i] ? '1' : '0', stderr);
  410. }
  411. fputc('\n', stderr);
  412. #endif
  413. if(argc <= 0) return cut(stdin, "<stdin>");
  414. for(int i = 0; i < argc; i++)
  415. {
  416. FILE *in = fopen(argv[i], "r");
  417. if(in == NULL)
  418. {
  419. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, argv[i], strerror(errno));
  420. return 1;
  421. }
  422. int ret = cut(in, argv[i]);
  423. if(fclose(in) < 0)
  424. {
  425. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, argv[i], strerror(errno));
  426. return 1;
  427. }
  428. if(ret != 0) return 1;
  429. }
  430. if(fclose(stdin) != 0)
  431. {
  432. fprintf(stderr, "%s: error: Failed closing <stdin>: %s\n", argv0, strerror(errno));
  433. return 1;
  434. }
  435. if(fclose(stdout) != 0)
  436. {
  437. fprintf(stderr, "%s: error: Failed closing <stdout>: %s\n", argv0, strerror(errno));
  438. return 1;
  439. }
  440. return 0;
  441. }