logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

uniq.c (5960B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../lib/getopt_nolong.h"
  6. #include <ctype.h> // isblank
  7. #include <errno.h>
  8. #include <stdbool.h>
  9. #include <stdio.h> // getline
  10. #include <stdlib.h> // atoi
  11. #include <string.h> // strncmp
  12. #include <unistd.h> // getopt
  13. enum uniq_mode
  14. {
  15. UNIQ, // default
  16. COUNT,
  17. ONLY_REPEAT,
  18. NO_REPEAT,
  19. };
  20. const char *argv0 = "uniq";
  21. int
  22. main(int argc, char *argv[])
  23. {
  24. enum uniq_mode mode = UNIQ;
  25. unsigned long field = 0, shift = 0;
  26. char *endptr = NULL;
  27. for(int c = -1; (c = getopt_nolong(argc, argv, ":cdf:s:u")) != -1;)
  28. {
  29. switch(c)
  30. {
  31. case 'c':
  32. if(mode != UNIQ)
  33. {
  34. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  35. return 1;
  36. }
  37. mode = COUNT;
  38. break;
  39. case 'd':
  40. if(mode != UNIQ)
  41. {
  42. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  43. return 1;
  44. }
  45. mode = ONLY_REPEAT;
  46. break;
  47. case 'f':
  48. errno = 0;
  49. field = strtoul(optarg, &endptr, 0);
  50. if(errno != 0)
  51. {
  52. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  53. return 1;
  54. }
  55. if(endptr != NULL && endptr[0] != 0)
  56. {
  57. fprintf(stderr,
  58. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  59. argv0,
  60. optarg,
  61. endptr);
  62. return 1;
  63. }
  64. break;
  65. case 's':
  66. errno = 0;
  67. shift = strtoul(optarg, &endptr, 0);
  68. if(errno != 0)
  69. {
  70. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  71. return 1;
  72. }
  73. if(endptr != NULL && endptr[0] != 0)
  74. {
  75. fprintf(stderr,
  76. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  77. argv0,
  78. optarg,
  79. endptr);
  80. return 1;
  81. }
  82. break;
  83. case 'u':
  84. if(mode != UNIQ)
  85. {
  86. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  87. return 1;
  88. }
  89. mode = NO_REPEAT;
  90. break;
  91. case ':':
  92. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  93. return 1;
  94. case '?':
  95. if(!got_long_opt) fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  96. return 1;
  97. default:
  98. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  99. abort();
  100. }
  101. }
  102. argc -= optind;
  103. argv += optind;
  104. FILE *input = stdin;
  105. char *input_name = NULL;
  106. FILE *output = stdout;
  107. char *output_name = NULL;
  108. switch(argc)
  109. {
  110. case 0:
  111. break;
  112. case 1:
  113. if(strcmp(argv[0], "-") != 0)
  114. {
  115. input = fopen(argv[0], "r");
  116. input_name = argv[0];
  117. if(input == NULL)
  118. {
  119. fprintf(
  120. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  121. return 1;
  122. }
  123. }
  124. break;
  125. case 2:
  126. if(strcmp(argv[0], "-") != 0)
  127. {
  128. input = fopen(argv[0], "r");
  129. input_name = argv[0];
  130. if(input == NULL)
  131. {
  132. fprintf(
  133. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  134. return 1;
  135. }
  136. }
  137. if(strcmp(argv[1], "-") != 0)
  138. {
  139. output = fopen(argv[1], "w");
  140. output_name = argv[1];
  141. if(output == NULL)
  142. {
  143. fprintf(
  144. stderr, "uniq: error: Failed opening output file '%s': %s\n", argv[1], strerror(errno));
  145. if(fclose(input) != 0)
  146. fprintf(stderr,
  147. "uniq: error: Failed closing input file '%s': %s\n",
  148. input_name,
  149. strerror(errno));
  150. return 1;
  151. }
  152. }
  153. break;
  154. default:
  155. fprintf(stderr, "uniq: error: Invalid number of arguments (%d), expected [0..2]\n", argc);
  156. return 1;
  157. }
  158. char *first = NULL;
  159. ssize_t first_len = 0;
  160. size_t first_shift = 0;
  161. unsigned counter = 1;
  162. errno = 0;
  163. while(true)
  164. {
  165. char *cur = NULL;
  166. size_t cur_size = 0;
  167. ssize_t cur_len = getline(&cur, &cur_size, input);
  168. size_t cur_shift = shift;
  169. if(cur_len > 0 && cur[cur_len - 1] == '\n')
  170. {
  171. cur[cur_len - 1] = 0;
  172. cur_len--;
  173. }
  174. if(field != 0)
  175. {
  176. ssize_t field_shift = 0;
  177. for(unsigned long i = 0; i < field; i++)
  178. {
  179. while(field_shift < cur_len && isblank(cur[field_shift]))
  180. field_shift++;
  181. while(field_shift < cur_len && !isblank(cur[field_shift]))
  182. field_shift++;
  183. }
  184. cur_shift += field_shift;
  185. }
  186. if(cur_shift > (size_t)cur_len)
  187. {
  188. free(cur);
  189. cur_size = 0;
  190. cur = NULL;
  191. break;
  192. }
  193. //fprintf(stderr, "[debug] {cur_shift:%d} <%s>\n", cur_shift, cur+cur_shift);
  194. if(first != NULL)
  195. {
  196. if(cur != NULL && (cur_len - cur_shift == first_len - first_shift) &&
  197. strncmp(cur + cur_shift, first + first_shift, cur_len - cur_shift) == 0)
  198. {
  199. counter += 1;
  200. }
  201. else
  202. {
  203. switch(mode)
  204. {
  205. case UNIQ:
  206. fwrite(first, first_len, 1, output);
  207. fprintf(output, "\n");
  208. break;
  209. case ONLY_REPEAT:
  210. if(counter > 1)
  211. {
  212. fwrite(first, first_len, 1, output);
  213. fprintf(output, "\n");
  214. }
  215. break;
  216. case NO_REPEAT:
  217. if(counter == 1)
  218. {
  219. fwrite(first, first_len, 1, output);
  220. fprintf(output, "\n");
  221. }
  222. break;
  223. case COUNT:
  224. fprintf(output, "%d %s\n", counter, first);
  225. break;
  226. }
  227. counter = 1;
  228. free(first);
  229. }
  230. }
  231. if(cur_len < 0)
  232. {
  233. if(cur_size > 0) free(cur);
  234. break;
  235. }
  236. if(counter == 1)
  237. {
  238. first = cur;
  239. first_len = cur_len;
  240. first_shift = cur_shift;
  241. }
  242. }
  243. int ret = 0;
  244. if(errno != 0)
  245. {
  246. fprintf(stderr, "uniq: error: Failed reading: %s\n", strerror(errno));
  247. ret = 1;
  248. }
  249. if(input != stdin)
  250. {
  251. if(fclose(input) != 0)
  252. {
  253. fprintf(
  254. stderr, "uniq: error: Failed closing input file '%s': %s\n", input_name, strerror(errno));
  255. ret = 1;
  256. }
  257. }
  258. if(output != stdout)
  259. {
  260. if(fclose(output) != 0)
  261. {
  262. fprintf(stderr,
  263. "uniq: error: Failed closing output file '%s': %s\n",
  264. output_name,
  265. strerror(errno));
  266. ret = 1;
  267. }
  268. }
  269. return ret;
  270. }