logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

cut.c (9010B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 202405L
  5. #include "../lib/getopt_nolong.h"
  6. #include "../lib/reallocarray.h"
  7. #include <assert.h>
  8. #include <ctype.h>
  9. #include <errno.h>
  10. #include <locale.h>
  11. #include <stdbool.h>
  12. #include <stdint.h> // size_t
  13. #include <stdio.h> // fprintf, fopen
  14. #include <string.h> // strerror
  15. #include <unistd.h> // getopt
  16. #include <wchar.h>
  17. #undef MIN
  18. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  19. enum cut_mode
  20. {
  21. CUT_MODE_NONE = 0,
  22. CUT_MODE_B = 1,
  23. CUT_MODE_C = 2,
  24. CUT_MODE_F = 3,
  25. };
  26. char delim = '\t';
  27. bool opt_n = false, opt_s = false;
  28. enum cut_mode mode = CUT_MODE_NONE;
  29. bool *list = NULL;
  30. size_t list_len = 0;
  31. bool nostop = false;
  32. const char *argv0 = "cut";
  33. static size_t
  34. parse_list_num(char **s)
  35. {
  36. char *endptr = NULL;
  37. errno = 0;
  38. size_t n = strtoul(*s, &endptr, 10);
  39. if(errno != 0)
  40. {
  41. fprintf(stderr, "%s: error: Failed parsing '%s' as a number: %s\n", argv0, *s, strerror(errno));
  42. return 0;
  43. }
  44. if(n < 1)
  45. {
  46. fprintf(stderr, "%s: error: Invalid number in list: %zu\n", argv0, n);
  47. return 0;
  48. }
  49. if(endptr != NULL && strchr(",-", *endptr) == NULL)
  50. {
  51. fprintf(stderr, "%s: error: Invalid character in list: %c\n", argv0, *endptr);
  52. return 0;
  53. }
  54. *s = endptr;
  55. return n;
  56. }
  57. static int
  58. parse_list(char *s)
  59. {
  60. while(true)
  61. {
  62. if(s == NULL || *s == '\0') break;
  63. if(*s == ',')
  64. {
  65. fprintf(stderr, "%s: error: Empty list element\n", argv0);
  66. return -1;
  67. }
  68. size_t min = 1;
  69. if(*s != '-')
  70. {
  71. min = parse_list_num(&s);
  72. if(min == 0) return -1;
  73. }
  74. // min-- as cut(1) is 1-indexed and max needs to be at least min+1
  75. size_t max = min--;
  76. if(s && *s == '-')
  77. {
  78. s++;
  79. if(!isdigit(*s))
  80. {
  81. nostop = true;
  82. }
  83. else
  84. {
  85. max = parse_list_num(&s);
  86. if(max == 0) return -1;
  87. if(max < min)
  88. {
  89. fprintf(stderr, "%s: error: Decreasing range: %zu-%zu\n", argv0, min, max);
  90. return -1;
  91. }
  92. }
  93. }
  94. // Needs to be after *s == '-'
  95. if(s && *s == ',') s++;
  96. if(max > list_len)
  97. {
  98. list = reallocarray(list, max, sizeof(*list));
  99. if(list == NULL)
  100. {
  101. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  102. return -1;
  103. }
  104. if(min > list_len)
  105. {
  106. memset(list + list_len, 0, min - list_len);
  107. }
  108. list_len = max;
  109. }
  110. memset(list + min, 1, max - min);
  111. }
  112. if(list_len == 0)
  113. {
  114. fprintf(stderr, "%s: error: Empty list\n", argv0);
  115. return -1;
  116. }
  117. return 0;
  118. }
  119. static int
  120. cut_b(FILE *in, const char *filename)
  121. {
  122. char *line = NULL;
  123. size_t line_len = 0;
  124. int err = 0;
  125. while(err == 0)
  126. {
  127. errno = 0;
  128. ssize_t nread = getline(&line, &line_len, in);
  129. if(nread < 0)
  130. {
  131. if(errno != 0)
  132. {
  133. fprintf(
  134. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  135. err = 1;
  136. }
  137. break;
  138. }
  139. if(nread == 0)
  140. {
  141. fputc('\n', stdout);
  142. continue;
  143. }
  144. if(line[nread - 1] == '\n') line[nread--] = '\0';
  145. for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
  146. if(list[i]) fputc(line[i], stdout);
  147. if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  148. fputc('\n', stdout);
  149. }
  150. if(line_len != 0) free(line);
  151. return err;
  152. }
  153. static int
  154. cut_c(FILE *in, const char *filename)
  155. {
  156. char *line = NULL;
  157. size_t line_len = 0;
  158. int err = 0;
  159. wchar_t *line_w = NULL;
  160. ssize_t line_wsz = 0;
  161. while(err == 0)
  162. {
  163. errno = 0;
  164. ssize_t nread = getline(&line, &line_len, in);
  165. if(nread < 0)
  166. {
  167. if(errno != 0)
  168. {
  169. fprintf(
  170. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  171. err = 1;
  172. }
  173. break;
  174. }
  175. if(nread == 0)
  176. {
  177. fputwc(L'\n', stdout);
  178. continue;
  179. }
  180. if(line[nread - 1] == '\n') line[nread--] = '\0';
  181. if(nread > line_wsz)
  182. {
  183. line_w = reallocarray(line_w, nread, sizeof(*line_w));
  184. if(line_w == NULL)
  185. {
  186. fprintf(stderr, "%s: error: Failed memory allocation: %s\n", argv0, strerror(errno));
  187. return -1;
  188. }
  189. line_wsz = nread;
  190. }
  191. assert(line_wsz > 0);
  192. size_t wcread = mbstowcs(line_w, line, line_wsz);
  193. if(wcread == (size_t)-1)
  194. {
  195. fprintf(stderr,
  196. "%s: error: Failed parsing characters in file '%s': %s\n",
  197. argv0,
  198. filename,
  199. strerror(errno));
  200. err = 1;
  201. break;
  202. }
  203. //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
  204. size_t i = 0;
  205. for(; i < MIN(list_len, wcread); i++)
  206. if(list[i]) fputwc(line_w[i], stdout);
  207. if(nostop && wcread > list_len)
  208. {
  209. for(; i < wcread; i++)
  210. fputwc(line_w[i], stdout);
  211. }
  212. fputwc(L'\n', stdout);
  213. }
  214. if(line_len != 0) free(line);
  215. return err;
  216. }
  217. static int
  218. cut_f(FILE *in, const char *filename)
  219. {
  220. char *line = NULL;
  221. size_t line_len = 0;
  222. int err = 0;
  223. while(err == 0)
  224. {
  225. errno = 0;
  226. ssize_t nread = getline(&line, &line_len, in);
  227. if(nread < 0)
  228. {
  229. if(errno != 0)
  230. {
  231. fprintf(
  232. stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
  233. err = 1;
  234. }
  235. break;
  236. }
  237. if(nread == 0)
  238. {
  239. fputc('\n', stdout);
  240. continue;
  241. }
  242. if(line[nread - 1] == '\n') line[--nread] = '\0';
  243. size_t di = 0;
  244. for(; di < (size_t)nread; di++)
  245. if(line[di] == delim) break;
  246. if(di == (size_t)nread)
  247. {
  248. if(!opt_s) puts(line);
  249. continue;
  250. }
  251. bool need_sep = false;
  252. char *c = line;
  253. for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
  254. {
  255. if(pos < (size_t)nread && line[pos] != delim) continue;
  256. line[pos] = '\0';
  257. if(i >= list_len)
  258. {
  259. if(!nostop) break;
  260. if(need_sep) fputc(delim, stdout);
  261. fputs(c, stdout);
  262. need_sep = true;
  263. }
  264. else if(list[i])
  265. {
  266. if(need_sep) fputc(delim, stdout);
  267. fputs(c, stdout);
  268. need_sep = true;
  269. }
  270. i++;
  271. c = line + pos + 1;
  272. }
  273. fputc('\n', stdout);
  274. }
  275. if(line_len != 0) free(line);
  276. return err;
  277. }
  278. static int
  279. cut(FILE *in, const char *filename)
  280. {
  281. switch(mode)
  282. {
  283. case CUT_MODE_NONE:
  284. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  285. return 1;
  286. case CUT_MODE_B:
  287. return cut_b(in, filename);
  288. case CUT_MODE_C:
  289. return cut_c(in, filename);
  290. case CUT_MODE_F:
  291. return cut_f(in, filename);
  292. default:
  293. abort();
  294. }
  295. }
  296. int
  297. main(int argc, char *argv[])
  298. {
  299. char *opt_list = NULL;
  300. char *lc_all = setlocale(LC_ALL, "");
  301. if(lc_all == NULL)
  302. {
  303. fprintf(stderr,
  304. "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
  305. argv0,
  306. strerror(errno));
  307. }
  308. errno = 0;
  309. for(int c = -1; (c = getopt_nolong(argc, argv, ":b:c:d:f:ns")) != -1;)
  310. {
  311. switch(c)
  312. {
  313. case 'b':
  314. if(opt_list != NULL)
  315. {
  316. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  317. return 1;
  318. }
  319. mode = CUT_MODE_B;
  320. opt_list = optarg;
  321. break;
  322. case 'c':
  323. if(opt_list != NULL)
  324. {
  325. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  326. return 1;
  327. }
  328. mode = CUT_MODE_C;
  329. opt_list = optarg;
  330. break;
  331. case 'f':
  332. if(opt_list != NULL)
  333. {
  334. fprintf(stderr, "%s: error: Only one list may be specified\n", argv0);
  335. return 1;
  336. }
  337. mode = CUT_MODE_F;
  338. opt_list = optarg;
  339. break;
  340. case 'd':
  341. if(optarg[0] != '\0' && optarg[1] != '\0')
  342. {
  343. fprintf(stderr,
  344. "%s: error: Option '-d' only accepts single characters, got \"%s\"\n",
  345. argv0,
  346. optarg);
  347. return 1;
  348. }
  349. delim = optarg[0];
  350. break;
  351. case 'n':
  352. opt_n = true;
  353. break;
  354. case 's':
  355. opt_s = true;
  356. break;
  357. case ':':
  358. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  359. return 1;
  360. case '?':
  361. if(!got_long_opt) fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  362. return 1;
  363. default:
  364. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  365. abort();
  366. }
  367. }
  368. argc -= optind;
  369. argv += optind;
  370. if(mode == CUT_MODE_NONE)
  371. {
  372. fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
  373. return 1;
  374. }
  375. if(parse_list(opt_list) < 0) return 1;
  376. #if 0
  377. fprintf(stderr, "[DEBUG] list: ");
  378. for(size_t i = 0; i < list_len; i++)
  379. {
  380. fputc(list[i] ? '1' : '0', stderr);
  381. }
  382. fputc('\n', stderr);
  383. #endif
  384. if(argc <= 0) return cut(stdin, "<stdin>");
  385. for(int i = 0; i < argc; i++)
  386. {
  387. FILE *in = fopen(argv[i], "r");
  388. if(in == NULL)
  389. {
  390. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, argv[i], strerror(errno));
  391. return 1;
  392. }
  393. int ret = cut(in, argv[i]);
  394. if(fclose(in) < 0)
  395. {
  396. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, argv[i], strerror(errno));
  397. return 1;
  398. }
  399. if(ret != 0) return 1;
  400. }
  401. if(fclose(stdin) != 0)
  402. {
  403. fprintf(stderr, "%s: error: Failed closing <stdin>: %s\n", argv0, strerror(errno));
  404. return 1;
  405. }
  406. if(fclose(stdout) != 0)
  407. {
  408. fprintf(stderr, "%s: error: Failed closing <stdout>: %s\n", argv0, strerror(errno));
  409. return 1;
  410. }
  411. return 0;
  412. }