logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

cut.c (9152B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 202405L
  5. #include "../lib/reallocarray.h"
  6. #include "../libutils/getopt_nolong.h"
  7. #include <assert.h>
  8. #include <ctype.h>
  9. #include <errno.h>
  10. #include <locale.h>
  11. #include <stdbool.h>
  12. #include <stdint.h> // size_t
  13. #include <stdio.h> // fprintf, fopen
  14. #include <string.h> // strerror
  15. #include <unistd.h> // getopt
  16. #include <wchar.h>
  17. #undef MIN
  18. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  19. enum cut_mode
  20. {
  21. CUT_MODE_NONE = 0,
  22. CUT_MODE_B = 1,
  23. CUT_MODE_C = 2,
  24. CUT_MODE_F = 3,
  25. };
  26. char delim = '\t';
  27. char line_delim = '\n';
  28. wchar_t line_delim_w = L'\n';
  29. bool opt_n = false, opt_s = false;
  30. enum cut_mode mode = CUT_MODE_NONE;
  31. bool *list = NULL;
  32. size_t list_len = 0;
  33. bool nostop = false;
  34. const char *argv0 = "cut";
  35. static size_t
  36. parse_list_num(char **s)
  37. {
  38. char *endptr = NULL;
  39. errno = 0;
  40. size_t n = strtoul(*s, &endptr, 10);
  41. if(errno != 0)
  42. {
  43. fprintf(stderr, "%s: error: Failed parsing '%s' as a number: %s\n", argv0, *s, strerror(errno));
  44. return 0;
  45. }
  46. if(n < 1)
  47. {
  48. fprintf(stderr, "%s: error: Invalid number in list: %zu\n", argv0, n);
  49. return 0;
  50. }
  51. if(endptr != NULL && strchr(",-", *endptr) == NULL)
  52. {
  53. fprintf(stderr, "%s: error: Invalid character in list: %c\n", argv0, *endptr);
  54. return 0;
  55. }
  56. *s = endptr;
  57. return n;
  58. }
  59. static int
  60. parse_list(char *s)
  61. {
  62. while(true)
  63. {
  64. if(s == NULL || *s == '\0') break;
  65. if(*s == ',')
  66. {
  67. fprintf(stderr, "%s: error: Empty list element\n", argv0);
  68. return -1;
  69. }
  70. size_t min = 1;
  71. if(*s != '-')
  72. {
  73. min = parse_list_num(&s);
  74. if(min == 0) return -1;
  75. }
  76. // min-- as cut(1) is 1-indexed and max needs to be at least min+1
  77. size_t max = min--;
  78. if(s && *s == '-')
  79. {
  80. s++;
  81. if(!isdigit(*s))
  82. {
  83. nostop = true;
  84. }
  85. else
  86. {
  87. max = parse_list_num(&s);
  88. if(max == 0) return -1;
  89. if(max < min)
  90. {
  91. fprintf(stderr, "%s: error: Decreasing range: %zu-%zu\n", argv0, min, max);
  92. return -1;
  93. }
  94. }
  95. }
  96. // Needs to be after *s == '-'
  97. if(s && *s == ',') s++;
  98. if(max > list_len)
  99. {
  100. list = reallocarray(list, max, sizeof(*list));
  101. if(list == NULL)
  102. {
  103. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  104. return -1;
  105. }
  106. if(min > list_len)
  107. {
  108. memset(list + list_len, 0, min - list_len);
  109. }
  110. list_len = max;
  111. }
  112. memset(list + min, 1, max - min);
  113. }
  114. if(list_len == 0)
  115. {
  116. fprintf(stderr, "%s: error: Empty list\n", argv0);
  117. return -1;
  118. }
  119. return 0;
  120. }
  121. static int
  122. cut_b(FILE *in, const char *filename)
  123. {
  124. char *line = NULL;
  125. size_t line_len = 0;
  126. int err = 0;
  127. while(err == 0)
  128. {
  129. errno = 0;
  130. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  131. if(nread < 0)
  132. {
  133. if(errno != 0)
  134. {
  135. fprintf(
  136. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  137. err = 1;
  138. }
  139. break;
  140. }
  141. if(nread == 0)
  142. {
  143. fputc(line_delim, stdout);
  144. continue;
  145. }
  146. if(line[nread - 1] == '\n') line[nread--] = '\0';
  147. for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
  148. if(list[i]) fputc(line[i], stdout);
  149. if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  150. fputc(line_delim, stdout);
  151. }
  152. if(line_len != 0) free(line);
  153. return err;
  154. }
  155. static int
  156. cut_c(FILE *in, const char *filename)
  157. {
  158. char *line = NULL;
  159. size_t line_len = 0;
  160. int err = 0;
  161. wchar_t *line_w = NULL;
  162. ssize_t line_wsz = 0;
  163. while(err == 0)
  164. {
  165. errno = 0;
  166. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  167. if(nread < 0)
  168. {
  169. if(errno != 0)
  170. {
  171. fprintf(
  172. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  173. err = 1;
  174. }
  175. break;
  176. }
  177. if(nread == 0)
  178. {
  179. fputwc(line_delim_w, stdout);
  180. continue;
  181. }
  182. if(line[nread - 1] == '\n') line[nread--] = '\0';
  183. if(nread > line_wsz)
  184. {
  185. line_w = reallocarray(line_w, nread, sizeof(*line_w));
  186. if(line_w == NULL)
  187. {
  188. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  189. err = 1;
  190. break;
  191. }
  192. line_wsz = nread;
  193. }
  194. assert(line_wsz > 0);
  195. size_t wcread = mbstowcs(line_w, line, line_wsz);
  196. if(wcread == (size_t)-1)
  197. {
  198. fprintf(stderr,
  199. "%s: error: Failed parsing characters in file '%s': %s\n",
  200. argv0,
  201. filename,
  202. strerror(errno));
  203. err = 1;
  204. break;
  205. }
  206. //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
  207. size_t i = 0;
  208. for(; i < MIN(list_len, wcread); i++)
  209. if(list[i]) fputwc(line_w[i], stdout);
  210. if(nostop && wcread > list_len)
  211. {
  212. for(; i < wcread; i++)
  213. fputwc(line_w[i], stdout);
  214. }
  215. fputwc(line_delim_w, stdout);
  216. }
  217. free(line);
  218. free(line_w);
  219. return err;
  220. }
  221. static int
  222. cut_f(FILE *in, const char *filename)
  223. {
  224. char *line = NULL;
  225. size_t line_len = 0;
  226. int err = 0;
  227. while(err == 0)
  228. {
  229. errno = 0;
  230. ssize_t nread = getdelim(&line, &line_len, line_delim, in);
  231. if(nread < 0)
  232. {
  233. if(errno != 0)
  234. {
  235. fprintf(
  236. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  237. err = 1;
  238. }
  239. break;
  240. }
  241. if(nread == 0)
  242. {
  243. fputc(line_delim, stdout);
  244. continue;
  245. }
  246. if(line[nread - 1] == '\n') line[--nread] = '\0';
  247. size_t di = 0;
  248. for(; di < (size_t)nread; di++)
  249. if(line[di] == delim) break;
  250. if(di == (size_t)nread)
  251. {
  252. if(!opt_s) puts(line);
  253. continue;
  254. }
  255. bool need_sep = false;
  256. char *c = line;
  257. for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
  258. {
  259. if(pos < (size_t)nread && line[pos] != delim) continue;
  260. line[pos] = '\0';
  261. if(i >= list_len)
  262. {
  263. if(!nostop) break;
  264. if(need_sep) fputc(delim, stdout);
  265. fputs(c, stdout);
  266. need_sep = true;
  267. }
  268. else if(list[i])
  269. {
  270. if(need_sep) fputc(delim, stdout);
  271. fputs(c, stdout);
  272. need_sep = true;
  273. }
  274. i++;
  275. c = line + pos + 1;
  276. }
  277. fputc(line_delim, stdout);
  278. }
  279. if(line_len != 0) free(line);
  280. return err;
  281. }
  282. static int
  283. cut(FILE *in, const char *filename)
  284. {
  285. switch(mode)
  286. {
  287. case CUT_MODE_NONE:
  288. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  289. return 1;
  290. case CUT_MODE_B:
  291. return cut_b(in, filename);
  292. case CUT_MODE_C:
  293. return cut_c(in, filename);
  294. case CUT_MODE_F:
  295. return cut_f(in, filename);
  296. default:
  297. abort();
  298. }
  299. }
  300. int
  301. main(int argc, char *argv[])
  302. {
  303. char *opt_list = NULL;
  304. char *lc_all = setlocale(LC_ALL, "");
  305. if(lc_all == NULL)
  306. {
  307. fprintf(stderr,
  308. "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
  309. argv0,
  310. strerror(errno));
  311. }
  312. errno = 0;
  313. for(int c = -1; (c = getopt_nolong(argc, argv, ":b:c:d:f:nsz")) != -1;)
  314. {
  315. switch(c)
  316. {
  317. case 'b':
  318. if(opt_list != NULL)
  319. {
  320. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  321. return 1;
  322. }
  323. mode = CUT_MODE_B;
  324. opt_list = optarg;
  325. break;
  326. case 'c':
  327. if(opt_list != NULL)
  328. {
  329. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  330. return 1;
  331. }
  332. mode = CUT_MODE_C;
  333. opt_list = optarg;
  334. break;
  335. case 'f':
  336. if(opt_list != NULL)
  337. {
  338. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  339. return 1;
  340. }
  341. mode = CUT_MODE_F;
  342. opt_list = optarg;
  343. break;
  344. case 'd':
  345. if(optarg[0] != '\0' && optarg[1] != '\0')
  346. {
  347. fprintf(stderr,
  348. "%s: error: Option '-d' only accepts single characters, got \"%s\"\n",
  349. argv0,
  350. optarg);
  351. return 1;
  352. }
  353. delim = optarg[0];
  354. break;
  355. case 'n':
  356. opt_n = true;
  357. break;
  358. case 's':
  359. opt_s = true;
  360. break;
  361. case 'z':
  362. line_delim = '\0';
  363. line_delim_w = L'\0';
  364. break;
  365. case ':':
  366. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  367. return 1;
  368. case '?':
  369. GETOPT_UNKNOWN_OPT
  370. return 1;
  371. default:
  372. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  373. abort();
  374. }
  375. }
  376. argc -= optind;
  377. argv += optind;
  378. if(mode == CUT_MODE_NONE)
  379. {
  380. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  381. return 1;
  382. }
  383. if(parse_list(opt_list) < 0) return 1;
  384. #if 0
  385. fprintf(stderr, "[DEBUG] list: ");
  386. for(size_t i = 0; i < list_len; i++)
  387. {
  388. fputc(list[i] ? '1' : '0', stderr);
  389. }
  390. fputc('\n', stderr);
  391. #endif
  392. if(argc <= 0) return cut(stdin, "<stdin>");
  393. for(int i = 0; i < argc; i++)
  394. {
  395. FILE *in = fopen(argv[i], "r");
  396. if(in == NULL)
  397. {
  398. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, argv[i], strerror(errno));
  399. return 1;
  400. }
  401. int ret = cut(in, argv[i]);
  402. if(fclose(in) < 0)
  403. {
  404. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, argv[i], strerror(errno));
  405. return 1;
  406. }
  407. if(ret != 0) return 1;
  408. }
  409. if(fclose(stdin) != 0)
  410. {
  411. fprintf(stderr, "%s: error: Failed closing <stdin>: %s\n", argv0, strerror(errno));
  412. return 1;
  413. }
  414. if(fclose(stdout) != 0)
  415. {
  416. fprintf(stderr, "%s: error: Failed closing <stdout>: %s\n", argv0, strerror(errno));
  417. return 1;
  418. }
  419. return 0;
  420. }