logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git

cut.c (8448B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 202405L
  5. #include "../lib/reallocarray.h"
  6. #include <assert.h>
  7. #include <ctype.h>
  8. #include <errno.h>
  9. #include <locale.h>
  10. #include <stdbool.h>
  11. #include <stdint.h> // size_t
  12. #include <stdio.h> // fprintf, fopen
  13. #include <string.h> // strerror
  14. #include <unistd.h> // getopt
  15. #include <wchar.h>
  16. #undef MIN
  17. #define MIN(a, b) (((a) < (b)) ? (a) : (b))
  18. enum cut_mode
  19. {
  20. CUT_MODE_NONE = 0,
  21. CUT_MODE_B = 1,
  22. CUT_MODE_C = 2,
  23. CUT_MODE_F = 3,
  24. };
  25. char delim = '\t';
  26. bool opt_n = false, opt_s = false;
  27. enum cut_mode mode = CUT_MODE_NONE;
  28. bool *list = NULL;
  29. size_t list_len = 0;
  30. bool nostop = false;
  31. static ssize_t
  32. parse_list_num(char **s)
  33. {
  34. char *endptr = NULL;
  35. errno = 0;
  36. size_t n = strtoul(*s, &endptr, 10);
  37. if(errno != 0)
  38. {
  39. fprintf(stderr, "cut: Error while parsing '%s' as a number: %s\n", *s, strerror(errno));
  40. return -1;
  41. }
  42. if(n < 1)
  43. {
  44. fprintf(stderr, "cut: Invalid number in list: %zu\n", n);
  45. return -1;
  46. }
  47. if(endptr != NULL && strchr(",-", *endptr) == NULL)
  48. {
  49. fprintf(stderr, "cut: Invalid character in list: %c\n", *endptr);
  50. return -1;
  51. }
  52. *s = endptr;
  53. return n;
  54. }
  55. static int
  56. parse_list(char *s)
  57. {
  58. while(true)
  59. {
  60. if(s == NULL || *s == '\0') break;
  61. if(*s == ',')
  62. {
  63. fprintf(stderr, "cut: Error: empty list element\n");
  64. return -1;
  65. }
  66. ssize_t min = 1;
  67. if(*s != '-')
  68. {
  69. min = parse_list_num(&s);
  70. if(min < 0) return -1;
  71. }
  72. // min-- as cut(1) is 1-indexed and max needs to be at least min+1
  73. ssize_t max = min--;
  74. if(s && *s == '-')
  75. {
  76. s++;
  77. if(!isdigit(*s))
  78. {
  79. nostop = true;
  80. }
  81. else
  82. {
  83. max = parse_list_num(&s);
  84. if(max < 0) return -1;
  85. if(max < min)
  86. {
  87. fprintf(stderr, "cut: Error: decreasing range: %zu-%zu\n", min, max);
  88. return -1;
  89. }
  90. }
  91. }
  92. // Needs to be after *s == '-'
  93. if(s && *s == ',') s++;
  94. if((size_t)max > list_len)
  95. {
  96. list = reallocarray(list, (size_t)max, sizeof(*list));
  97. if(list == NULL)
  98. {
  99. fprintf(stderr, "cut: Memory allocation error: %s\n", strerror(errno));
  100. return -1;
  101. }
  102. if((size_t)min > list_len)
  103. {
  104. memset(list + list_len, 0, min - list_len);
  105. }
  106. list_len = max;
  107. }
  108. memset(list + min, 1, max - min);
  109. }
  110. if(list_len == 0)
  111. {
  112. fprintf(stderr, "cut: Error: empty list\n");
  113. return -1;
  114. }
  115. return 0;
  116. }
  117. static int
  118. cut_b(FILE *in, const char *filename)
  119. {
  120. char *line = NULL;
  121. size_t line_len = 0;
  122. int err = 0;
  123. while(err == 0)
  124. {
  125. errno = 0;
  126. ssize_t nread = getline(&line, &line_len, in);
  127. if(nread < 0)
  128. {
  129. if(errno != 0)
  130. {
  131. fprintf(stderr, "cut: Error while reading file '%s': %s\n", filename, strerror(errno));
  132. err = 1;
  133. }
  134. break;
  135. }
  136. if(nread == 0)
  137. {
  138. fputc('\n', stdout);
  139. continue;
  140. }
  141. if(line[nread - 1] == '\n') line[nread--] = '\0';
  142. for(size_t i = 0; i < MIN(list_len, (size_t)nread); i++)
  143. if(list[i]) fputc(line[i], stdout);
  144. if(nostop && (size_t)nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
  145. fputc('\n', stdout);
  146. }
  147. if(line_len != 0) free(line);
  148. return err;
  149. }
  150. static int
  151. cut_c(FILE *in, const char *filename)
  152. {
  153. char *line = NULL;
  154. size_t line_len = 0;
  155. int err = 0;
  156. wchar_t *line_w = NULL;
  157. ssize_t line_wsz = 0;
  158. while(err == 0)
  159. {
  160. errno = 0;
  161. ssize_t nread = getline(&line, &line_len, in);
  162. if(nread < 0)
  163. {
  164. if(errno != 0)
  165. {
  166. fprintf(stderr, "cut: Error while reading file '%s': %s\n", filename, strerror(errno));
  167. err = 1;
  168. }
  169. break;
  170. }
  171. if(nread == 0)
  172. {
  173. fputc('\n', stdout);
  174. continue;
  175. }
  176. if(line[nread - 1] == '\n') line[nread--] = '\0';
  177. if(nread > line_wsz)
  178. {
  179. line_w = reallocarray(line_w, nread, sizeof(*line_w));
  180. if(line_w == NULL)
  181. {
  182. fprintf(stderr, "cut: Memory allocation error: %s\n", strerror(errno));
  183. return -1;
  184. }
  185. line_wsz = nread;
  186. }
  187. assert(line_wsz > 0);
  188. size_t wcread = mbstowcs(line_w, line, line_wsz);
  189. if(wcread == (size_t)-1)
  190. {
  191. fprintf(stderr,
  192. "cut: Error while parsing characters in file '%s': %s\n",
  193. filename,
  194. strerror(errno));
  195. err = 1;
  196. break;
  197. }
  198. //DEBUG fprintf(stderr, "cut: mbstowcs(_, _, %zu) => %zu\n", nread, wcread);
  199. size_t i = 0;
  200. for(; i < MIN(list_len, wcread); i++)
  201. if(list[i]) fputwc(line_w[i], stdout);
  202. if(nostop && wcread > list_len)
  203. {
  204. for(; i < wcread; i++)
  205. fputwc(line_w[i], stdout);
  206. }
  207. fputc('\n', stdout);
  208. }
  209. if(line_len != 0) free(line);
  210. return err;
  211. }
  212. static int
  213. cut_f(FILE *in, const char *filename)
  214. {
  215. char *line = NULL;
  216. size_t line_len = 0;
  217. int err = 0;
  218. while(err == 0)
  219. {
  220. errno = 0;
  221. ssize_t nread = getline(&line, &line_len, in);
  222. if(nread < 0)
  223. {
  224. if(errno != 0)
  225. {
  226. fprintf(stderr, "cut: Error while reading file '%s': %s\n", filename, strerror(errno));
  227. err = 1;
  228. }
  229. break;
  230. }
  231. if(nread == 0)
  232. {
  233. fputc('\n', stdout);
  234. continue;
  235. }
  236. if(line[nread - 1] == '\n') line[--nread] = '\0';
  237. size_t di = 0;
  238. for(; di < (size_t)nread; di++)
  239. if(line[di] == delim) break;
  240. if(di == (size_t)nread)
  241. {
  242. if(!opt_s) puts(line);
  243. continue;
  244. }
  245. bool need_sep = false;
  246. char *c = line;
  247. for(size_t pos = 0, i = 0; pos <= (size_t)nread; pos++)
  248. {
  249. if(pos < (size_t)nread && line[pos] != delim) continue;
  250. line[pos] = '\0';
  251. if(i >= list_len)
  252. {
  253. if(!nostop) break;
  254. if(need_sep) fputc(delim, stdout);
  255. fputs(c, stdout);
  256. need_sep = true;
  257. }
  258. else if(list[i])
  259. {
  260. if(need_sep) fputc(delim, stdout);
  261. fputs(c, stdout);
  262. need_sep = true;
  263. }
  264. i++;
  265. c = line + pos + 1;
  266. }
  267. fputc('\n', stdout);
  268. }
  269. if(line_len != 0) free(line);
  270. return err;
  271. }
  272. static int
  273. cut(FILE *in, const char *filename)
  274. {
  275. switch(mode)
  276. {
  277. case CUT_MODE_NONE:
  278. fprintf(stderr, "cut: No action (-b, -c, -f) specified\n");
  279. return 1;
  280. case CUT_MODE_B:
  281. return cut_b(in, filename);
  282. case CUT_MODE_C:
  283. return cut_c(in, filename);
  284. case CUT_MODE_F:
  285. return cut_f(in, filename);
  286. default:
  287. abort();
  288. }
  289. }
  290. int
  291. main(int argc, char *argv[])
  292. {
  293. char *opt_list = NULL;
  294. errno = 0;
  295. setlocale(LC_ALL, "");
  296. if(errno != 0)
  297. {
  298. fprintf(stderr, "cut: Warning: Failed to initialize locales: %s\n", strerror(errno));
  299. errno = 0;
  300. }
  301. int c = -1;
  302. while((c = getopt(argc, argv, ":b:c:d:f:ns")) != -1)
  303. {
  304. switch(c)
  305. {
  306. case 'b':
  307. if(opt_list != NULL)
  308. {
  309. fprintf(stderr, "cut: Only one list may be specified\n");
  310. return 1;
  311. }
  312. mode = CUT_MODE_B;
  313. opt_list = optarg;
  314. break;
  315. case 'c':
  316. if(opt_list != NULL)
  317. {
  318. fprintf(stderr, "cut: Only one list may be specified\n");
  319. return 1;
  320. }
  321. mode = CUT_MODE_C;
  322. opt_list = optarg;
  323. break;
  324. case 'f':
  325. if(opt_list != NULL)
  326. {
  327. fprintf(stderr, "cut: Only one list may be specified\n");
  328. return 1;
  329. }
  330. mode = CUT_MODE_F;
  331. opt_list = optarg;
  332. break;
  333. case 'd':
  334. if(optarg[0] != '\0' && optarg[1] != '\0')
  335. {
  336. fprintf(stderr, "cut: Option '-d' only accepts single characters, got \"%s\"\n", optarg);
  337. return 1;
  338. }
  339. delim = optarg[0];
  340. break;
  341. case 'n':
  342. opt_n = true;
  343. break;
  344. case 's':
  345. opt_s = true;
  346. break;
  347. case ':':
  348. fprintf(stderr, "cut: Option '-%c' requires an operand\n", optopt);
  349. return 1;
  350. default:
  351. fprintf(stderr, "cut: Unhandled option '-%c'\n", optopt);
  352. return 1;
  353. }
  354. }
  355. argc -= optind;
  356. argv += optind;
  357. if(mode == CUT_MODE_NONE)
  358. {
  359. fprintf(stderr, "cut: No action (-b, -c, -f) specified\n");
  360. return 1;
  361. }
  362. if(parse_list(opt_list) < 0) return 1;
  363. #if 0
  364. fprintf(stderr, "[DEBUG] list: ");
  365. for(size_t i = 0; i < list_len; i++)
  366. {
  367. fputc(list[i] ? '1' : '0', stderr);
  368. }
  369. fputc('\n', stderr);
  370. #endif
  371. if(argc <= 0) return cut(stdin, "<stdin>");
  372. for(int i = 0; i < argc; i++)
  373. {
  374. FILE *in = fopen(argv[i], "r");
  375. if(in == NULL)
  376. {
  377. fprintf(stderr, "cut: Failed opening file '%s': %s\n", argv[i], strerror(errno));
  378. return 1;
  379. }
  380. int ret = cut(in, argv[i]);
  381. if(fclose(in) < 0)
  382. {
  383. fprintf(stderr, "cut: Failed closing file '%s': %s\n", argv[i], strerror(errno));
  384. return 1;
  385. }
  386. if(ret != 0) return 1;
  387. }
  388. if(fclose(stdin) != 0)
  389. {
  390. fprintf(stderr, "cut: Error closing <stdin>: %s\n", strerror(errno));
  391. return 1;
  392. }
  393. if(fclose(stdout) != 0)
  394. {
  395. fprintf(stderr, "cut: Error closing <stdout>: %s\n", strerror(errno));
  396. return 1;
  397. }
  398. return 0;
  399. }