logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

uniq.c (5893B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include <ctype.h> // isblank
  6. #include <errno.h>
  7. #include <stdbool.h>
  8. #include <stdio.h> // getline
  9. #include <stdlib.h> // atoi
  10. #include <string.h> // strncmp
  11. #include <unistd.h> // getopt
  12. enum uniq_mode
  13. {
  14. UNIQ, // default
  15. COUNT,
  16. ONLY_REPEAT,
  17. NO_REPEAT,
  18. };
  19. const char *argv0 = "uniq";
  20. int
  21. main(int argc, char *argv[])
  22. {
  23. enum uniq_mode mode = UNIQ;
  24. unsigned long field = 0, shift = 0;
  25. char *endptr = NULL;
  26. for(int c = -1; (c = getopt(argc, argv, ":cdf:s:u")) != -1;)
  27. {
  28. switch(c)
  29. {
  30. case 'c':
  31. if(mode != UNIQ)
  32. {
  33. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  34. return 1;
  35. }
  36. mode = COUNT;
  37. break;
  38. case 'd':
  39. if(mode != UNIQ)
  40. {
  41. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  42. return 1;
  43. }
  44. mode = ONLY_REPEAT;
  45. break;
  46. case 'f':
  47. errno = 0;
  48. field = strtoul(optarg, &endptr, 0);
  49. if(errno != 0)
  50. {
  51. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  52. return 1;
  53. }
  54. if(endptr != NULL && endptr[0] != 0)
  55. {
  56. fprintf(stderr,
  57. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  58. argv0,
  59. optarg,
  60. endptr);
  61. return 1;
  62. }
  63. break;
  64. case 's':
  65. errno = 0;
  66. shift = strtoul(optarg, &endptr, 0);
  67. if(errno != 0)
  68. {
  69. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  70. return 1;
  71. }
  72. if(endptr != NULL && endptr[0] != 0)
  73. {
  74. fprintf(stderr,
  75. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  76. argv0,
  77. optarg,
  78. endptr);
  79. return 1;
  80. }
  81. break;
  82. case 'u':
  83. if(mode != UNIQ)
  84. {
  85. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  86. return 1;
  87. }
  88. mode = NO_REPEAT;
  89. break;
  90. case ':':
  91. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  92. return 1;
  93. case '?':
  94. fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  95. return 1;
  96. default:
  97. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  98. abort();
  99. }
  100. }
  101. argc -= optind;
  102. argv += optind;
  103. FILE *input = stdin;
  104. char *input_name = NULL;
  105. FILE *output = stdout;
  106. char *output_name = NULL;
  107. switch(argc)
  108. {
  109. case 0:
  110. break;
  111. case 1:
  112. if(strcmp(argv[0], "-") != 0)
  113. {
  114. input = fopen(argv[0], "r");
  115. input_name = argv[0];
  116. if(input == NULL)
  117. {
  118. fprintf(
  119. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  120. return 1;
  121. }
  122. }
  123. break;
  124. case 2:
  125. if(strcmp(argv[0], "-") != 0)
  126. {
  127. input = fopen(argv[0], "r");
  128. input_name = argv[0];
  129. if(input == NULL)
  130. {
  131. fprintf(
  132. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  133. return 1;
  134. }
  135. }
  136. if(strcmp(argv[1], "-") != 0)
  137. {
  138. output = fopen(argv[1], "w");
  139. output_name = argv[1];
  140. if(output == NULL)
  141. {
  142. fprintf(
  143. stderr, "uniq: error: Failed opening output file '%s': %s\n", argv[1], strerror(errno));
  144. if(fclose(input) != 0)
  145. fprintf(stderr,
  146. "uniq: error: Failed closing input file '%s': %s\n",
  147. input_name,
  148. strerror(errno));
  149. return 1;
  150. }
  151. }
  152. break;
  153. default:
  154. fprintf(stderr, "uniq: error: Invalid number of arguments (%d), expected [0..2]\n", argc);
  155. return 1;
  156. }
  157. char *first = NULL;
  158. ssize_t first_len = 0;
  159. size_t first_shift = 0;
  160. unsigned counter = 1;
  161. errno = 0;
  162. while(true)
  163. {
  164. char *cur = NULL;
  165. size_t cur_size = 0;
  166. ssize_t cur_len = getline(&cur, &cur_size, input);
  167. size_t cur_shift = shift;
  168. if(cur_len > 0 && cur[cur_len - 1] == '\n')
  169. {
  170. cur[cur_len - 1] = 0;
  171. cur_len--;
  172. }
  173. if(field != 0)
  174. {
  175. ssize_t field_shift = 0;
  176. for(unsigned long i = 0; i < field; i++)
  177. {
  178. while(field_shift < cur_len && isblank(cur[field_shift]))
  179. field_shift++;
  180. while(field_shift < cur_len && !isblank(cur[field_shift]))
  181. field_shift++;
  182. }
  183. cur_shift += field_shift;
  184. }
  185. if(cur_shift > cur_len)
  186. {
  187. free(cur);
  188. cur_size = 0;
  189. cur = NULL;
  190. break;
  191. }
  192. //fprintf(stderr, "[debug] {cur_shift:%d} <%s>\n", cur_shift, cur+cur_shift);
  193. if(first != NULL)
  194. {
  195. if(cur != NULL && (cur_len - cur_shift == first_len - first_shift) &&
  196. strncmp(cur + cur_shift, first + first_shift, cur_len - cur_shift) == 0)
  197. {
  198. counter += 1;
  199. }
  200. else
  201. {
  202. switch(mode)
  203. {
  204. case UNIQ:
  205. fwrite(first, first_len, 1, output);
  206. fprintf(output, "\n");
  207. break;
  208. case ONLY_REPEAT:
  209. if(counter > 1)
  210. {
  211. fwrite(first, first_len, 1, output);
  212. fprintf(output, "\n");
  213. }
  214. break;
  215. case NO_REPEAT:
  216. if(counter == 1)
  217. {
  218. fwrite(first, first_len, 1, output);
  219. fprintf(output, "\n");
  220. }
  221. break;
  222. case COUNT:
  223. fprintf(output, "%d %s\n", counter, first);
  224. break;
  225. }
  226. counter = 1;
  227. free(first);
  228. }
  229. }
  230. if(cur_len < 0)
  231. {
  232. if(cur_size > 0) free(cur);
  233. break;
  234. }
  235. if(counter == 1)
  236. {
  237. first = cur;
  238. first_len = cur_len;
  239. first_shift = cur_shift;
  240. }
  241. }
  242. int ret = 0;
  243. if(errno != 0)
  244. {
  245. fprintf(stderr, "uniq: error: Failed reading: %s\n", strerror(errno));
  246. ret = 1;
  247. }
  248. if(input != stdin)
  249. {
  250. if(fclose(input) != 0)
  251. {
  252. fprintf(
  253. stderr, "uniq: error: Failed closing input file '%s': %s\n", input_name, strerror(errno));
  254. ret = 1;
  255. }
  256. }
  257. if(output != stdout)
  258. {
  259. if(fclose(output) != 0)
  260. {
  261. fprintf(stderr,
  262. "uniq: error: Failed closing output file '%s': %s\n",
  263. output_name,
  264. strerror(errno));
  265. ret = 1;
  266. }
  267. }
  268. return ret;
  269. }