logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

uniq.c (6518B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../config.h"
  6. #include "../libutils/getopt_nolong.h"
  7. #include <ctype.h> // isblank
  8. #include <errno.h>
  9. #include <stdbool.h>
  10. #include <stdio.h> // getline
  11. #include <stdlib.h> // atoi
  12. #include <string.h> // strncmp
  13. #include <unistd.h> // getopt
  14. #ifdef HAS_GETOPT_LONG
  15. #include <getopt.h>
  16. #endif
  17. enum uniq_mode
  18. {
  19. UNIQ, // default
  20. COUNT,
  21. ONLY_REPEAT,
  22. NO_REPEAT,
  23. };
  24. const char *argv0 = "uniq";
  25. int
  26. main(int argc, char *argv[])
  27. {
  28. enum uniq_mode mode = UNIQ;
  29. unsigned long field = 0, shift = 0;
  30. char *endptr = NULL;
  31. #ifdef HAS_GETOPT_LONG
  32. // Strictly for GNUisms compatibility so no long-only options
  33. // clang-format off
  34. static struct option opts[] = {
  35. {"count", no_argument, NULL, 'c'},
  36. {"repeated", no_argument, NULL, 'd'},
  37. {"skip-fields", required_argument, NULL, 'f'},
  38. {"skip-chars", required_argument, NULL, 's'},
  39. {"unique", no_argument, NULL, 'u'},
  40. {0, 0, 0, 0},
  41. };
  42. // clang-format on
  43. // Need + as first character to get POSIX-style option parsing
  44. for(int c = -1; (c = getopt_long(argc, argv, "+:cdf:s:u", opts, NULL)) != -1;)
  45. #else
  46. for(int c = -1; (c = getopt_nolong(argc, argv, ":cdf:s:u")) != -1;)
  47. #endif
  48. {
  49. switch(c)
  50. {
  51. case 'c':
  52. if(mode != UNIQ)
  53. {
  54. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  55. return 1;
  56. }
  57. mode = COUNT;
  58. break;
  59. case 'd':
  60. if(mode != UNIQ)
  61. {
  62. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  63. return 1;
  64. }
  65. mode = ONLY_REPEAT;
  66. break;
  67. case 'f':
  68. errno = 0;
  69. field = strtoul(optarg, &endptr, 0);
  70. if(errno != 0)
  71. {
  72. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  73. return 1;
  74. }
  75. if(endptr != NULL && endptr[0] != 0)
  76. {
  77. fprintf(stderr,
  78. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  79. argv0,
  80. optarg,
  81. endptr);
  82. return 1;
  83. }
  84. break;
  85. case 's':
  86. errno = 0;
  87. shift = strtoul(optarg, &endptr, 0);
  88. if(errno != 0)
  89. {
  90. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  91. return 1;
  92. }
  93. if(endptr != NULL && endptr[0] != 0)
  94. {
  95. fprintf(stderr,
  96. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  97. argv0,
  98. optarg,
  99. endptr);
  100. return 1;
  101. }
  102. break;
  103. case 'u':
  104. if(mode != UNIQ)
  105. {
  106. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  107. return 1;
  108. }
  109. mode = NO_REPEAT;
  110. break;
  111. case ':':
  112. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  113. return 1;
  114. case '?':
  115. GETOPT_UNKNOWN_OPT
  116. return 1;
  117. default:
  118. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  119. abort();
  120. }
  121. }
  122. argc -= optind;
  123. argv += optind;
  124. FILE *input = stdin;
  125. char *input_name = NULL;
  126. FILE *output = stdout;
  127. char *output_name = NULL;
  128. switch(argc)
  129. {
  130. case 0:
  131. break;
  132. case 1:
  133. if(strcmp(argv[0], "-") != 0)
  134. {
  135. input = fopen(argv[0], "r");
  136. input_name = argv[0];
  137. if(input == NULL)
  138. {
  139. fprintf(
  140. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  141. return 1;
  142. }
  143. }
  144. break;
  145. case 2:
  146. if(strcmp(argv[0], "-") != 0)
  147. {
  148. input = fopen(argv[0], "r");
  149. input_name = argv[0];
  150. if(input == NULL)
  151. {
  152. fprintf(
  153. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  154. return 1;
  155. }
  156. }
  157. if(strcmp(argv[1], "-") != 0)
  158. {
  159. output = fopen(argv[1], "w");
  160. output_name = argv[1];
  161. if(output == NULL)
  162. {
  163. fprintf(
  164. stderr, "uniq: error: Failed opening output file '%s': %s\n", argv[1], strerror(errno));
  165. if(fclose(input) != 0)
  166. fprintf(stderr,
  167. "uniq: error: Failed closing input file '%s': %s\n",
  168. input_name,
  169. strerror(errno));
  170. return 1;
  171. }
  172. }
  173. break;
  174. default:
  175. fprintf(stderr, "uniq: error: Invalid number of arguments (%d), expected [0..2]\n", argc);
  176. return 1;
  177. }
  178. char *first = NULL;
  179. ssize_t first_len = 0;
  180. size_t first_shift = 0;
  181. unsigned counter = 1;
  182. errno = 0;
  183. while(true)
  184. {
  185. char *cur = NULL;
  186. size_t cur_size = 0;
  187. ssize_t cur_len = getline(&cur, &cur_size, input);
  188. size_t cur_shift = shift;
  189. if(cur_len > 0 && cur[cur_len - 1] == '\n')
  190. {
  191. cur[cur_len - 1] = 0;
  192. cur_len--;
  193. }
  194. if(field != 0)
  195. {
  196. ssize_t field_shift = 0;
  197. for(unsigned long i = 0; i < field; i++)
  198. {
  199. while(field_shift < cur_len && isblank(cur[field_shift]))
  200. field_shift++;
  201. while(field_shift < cur_len && !isblank(cur[field_shift]))
  202. field_shift++;
  203. }
  204. cur_shift += field_shift;
  205. }
  206. if(cur_shift > (size_t)cur_len)
  207. {
  208. free(cur);
  209. cur_size = 0;
  210. cur = NULL;
  211. break;
  212. }
  213. //fprintf(stderr, "[debug] {cur_shift:%d} <%s>\n", cur_shift, cur+cur_shift);
  214. if(first != NULL)
  215. {
  216. if(cur != NULL && (cur_len - cur_shift == first_len - first_shift) &&
  217. strncmp(cur + cur_shift, first + first_shift, cur_len - cur_shift) == 0)
  218. {
  219. counter += 1;
  220. }
  221. else
  222. {
  223. switch(mode)
  224. {
  225. case UNIQ:
  226. fwrite(first, first_len, 1, output);
  227. fprintf(output, "\n");
  228. break;
  229. case ONLY_REPEAT:
  230. if(counter > 1)
  231. {
  232. fwrite(first, first_len, 1, output);
  233. fprintf(output, "\n");
  234. }
  235. break;
  236. case NO_REPEAT:
  237. if(counter == 1)
  238. {
  239. fwrite(first, first_len, 1, output);
  240. fprintf(output, "\n");
  241. }
  242. break;
  243. case COUNT:
  244. fprintf(output, "%d %s\n", counter, first);
  245. break;
  246. }
  247. counter = 1;
  248. free(first);
  249. }
  250. }
  251. if(cur_len < 0)
  252. {
  253. if(cur_size > 0) free(cur);
  254. break;
  255. }
  256. if(counter == 1)
  257. {
  258. first = cur;
  259. first_len = cur_len;
  260. first_shift = cur_shift;
  261. }
  262. }
  263. int ret = 0;
  264. if(errno != 0)
  265. {
  266. fprintf(stderr, "uniq: error: Failed reading: %s\n", strerror(errno));
  267. ret = 1;
  268. }
  269. if(input != stdin)
  270. {
  271. if(fclose(input) != 0)
  272. {
  273. fprintf(
  274. stderr, "uniq: error: Failed closing input file '%s': %s\n", input_name, strerror(errno));
  275. ret = 1;
  276. }
  277. }
  278. if(output != stdout)
  279. {
  280. if(fclose(output) != 0)
  281. {
  282. fprintf(stderr,
  283. "uniq: error: Failed closing output file '%s': %s\n",
  284. output_name,
  285. strerror(errno));
  286. ret = 1;
  287. }
  288. }
  289. return ret;
  290. }