logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git

uniq.c (5129B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include <assert.h>
  6. #include <ctype.h> // isblank
  7. #include <errno.h>
  8. #include <stdbool.h>
  9. #include <stdio.h> // getline
  10. #include <stdlib.h> // atoi
  11. #include <string.h> // strncmp
  12. #include <unistd.h> // getopt
  13. enum uniq_mode
  14. {
  15. UNIQ, // default
  16. COUNT,
  17. ONLY_REPEAT,
  18. NO_REPEAT,
  19. };
  20. const char *argv0 = "uniq";
  21. int
  22. main(int argc, char *argv[])
  23. {
  24. enum uniq_mode mode = UNIQ;
  25. unsigned long field = 0, shift = 0;
  26. char *endptr = NULL;
  27. int c = -1;
  28. while((c = getopt(argc, argv, ":cdf:s:u")) != -1)
  29. {
  30. switch(c)
  31. {
  32. case 'c':
  33. if(mode != UNIQ)
  34. {
  35. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  36. return 1;
  37. }
  38. mode = COUNT;
  39. break;
  40. case 'd':
  41. if(mode != UNIQ)
  42. {
  43. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  44. return 1;
  45. }
  46. mode = ONLY_REPEAT;
  47. break;
  48. case 'f':
  49. errno = 0;
  50. field = strtoul(optarg, &endptr, 0);
  51. if(errno != 0)
  52. {
  53. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  54. return 1;
  55. }
  56. if(endptr != NULL && endptr[0] != 0)
  57. {
  58. fprintf(stderr,
  59. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  60. argv0,
  61. optarg,
  62. endptr);
  63. return 1;
  64. }
  65. break;
  66. case 's':
  67. errno = 0;
  68. shift = strtoul(optarg, &endptr, 0);
  69. if(errno != 0)
  70. {
  71. fprintf(stderr, "%s: error: Failed parsing '-f %s': %s\n", argv0, optarg, strerror(errno));
  72. return 1;
  73. }
  74. if(endptr != NULL && endptr[0] != 0)
  75. {
  76. fprintf(stderr,
  77. "%s: error: Non-numeric characters passed to '-f %s': %s\n",
  78. argv0,
  79. optarg,
  80. endptr);
  81. return 1;
  82. }
  83. break;
  84. case 'u':
  85. if(mode != UNIQ)
  86. {
  87. fprintf(stderr, "%s: error: can only pass one of [-c|-d|-u]\n", argv0);
  88. return 1;
  89. }
  90. mode = NO_REPEAT;
  91. break;
  92. case ':':
  93. fprintf(stderr, "%s: error: Option '-%c' requires an operand\n", argv0, optopt);
  94. return 1;
  95. case '?':
  96. fprintf(stderr, "%s: error: Unhandled option '-%c'\n", argv0, optopt);
  97. return 1;
  98. default:
  99. fprintf(stderr, "%s: error: Unhandled getopt case '%c'\n", argv0, c);
  100. abort();
  101. }
  102. }
  103. argc -= optind;
  104. argv += optind;
  105. assert(errno == 0);
  106. FILE *input = stdin;
  107. FILE *output = stdout;
  108. switch(argc)
  109. {
  110. case 0:
  111. break;
  112. case 1:
  113. input = fopen(argv[0], "r");
  114. if(input == NULL)
  115. {
  116. fprintf(
  117. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  118. return 1;
  119. }
  120. break;
  121. case 2:
  122. input = fopen(argv[0], "r");
  123. if(input == NULL)
  124. {
  125. fprintf(
  126. stderr, "uniq: error: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  127. return 1;
  128. }
  129. output = fopen(argv[1], "w");
  130. if(output == NULL)
  131. {
  132. fprintf(
  133. stderr, "uniq: error: Failed opening output file '%s': %s\n", argv[1], strerror(errno));
  134. return 1;
  135. }
  136. break;
  137. default:
  138. fprintf(stderr, "uniq: error: Invalid number of arguments (%d), expected [0..2]\n", argc);
  139. return 1;
  140. }
  141. assert(errno == 0);
  142. char *first = NULL;
  143. ssize_t first_len = 0;
  144. size_t first_shift = 0;
  145. unsigned counter = 1;
  146. errno = 0;
  147. while(true)
  148. {
  149. assert(errno == 0);
  150. char *cur = NULL;
  151. size_t cur_size = 0;
  152. ssize_t cur_len = getline(&cur, &cur_size, input);
  153. size_t cur_shift = shift;
  154. if(cur_len > 0 && cur[cur_len - 1] == '\n')
  155. {
  156. cur[cur_len - 1] = 0;
  157. cur_len--;
  158. }
  159. if(field != 0)
  160. {
  161. ssize_t field_shift = 0;
  162. for(unsigned long i = 0; i < field; i++)
  163. {
  164. while(field_shift < cur_len && isblank(cur[field_shift]))
  165. field_shift++;
  166. while(field_shift < cur_len && !isblank(cur[field_shift]))
  167. field_shift++;
  168. }
  169. cur_shift += field_shift;
  170. }
  171. if(cur_shift > cur_len)
  172. {
  173. free(cur);
  174. cur_size = 0;
  175. cur = NULL;
  176. break;
  177. }
  178. //fprintf(stderr, "[debug] {cur_shift:%d} <%s>\n", cur_shift, cur+cur_shift);
  179. if(first != NULL)
  180. {
  181. if(cur != NULL && (cur_len - cur_shift == first_len - first_shift) &&
  182. strncmp(cur + cur_shift, first + first_shift, cur_len - cur_shift) == 0)
  183. {
  184. counter += 1;
  185. }
  186. else
  187. {
  188. switch(mode)
  189. {
  190. case UNIQ:
  191. fwrite(first, first_len, 1, output);
  192. fprintf(output, "\n");
  193. break;
  194. case ONLY_REPEAT:
  195. if(counter > 1)
  196. {
  197. fwrite(first, first_len, 1, output);
  198. fprintf(output, "\n");
  199. }
  200. break;
  201. case NO_REPEAT:
  202. if(counter == 1)
  203. {
  204. fwrite(first, first_len, 1, output);
  205. fprintf(output, "\n");
  206. }
  207. break;
  208. case COUNT:
  209. fprintf(output, "%d %s\n", counter, first);
  210. break;
  211. }
  212. counter = 1;
  213. free(first);
  214. }
  215. }
  216. if(cur_len < 0)
  217. {
  218. if(cur_size > 0) free(cur);
  219. break;
  220. }
  221. if(counter == 1)
  222. {
  223. first = cur;
  224. first_len = cur_len;
  225. first_shift = cur_shift;
  226. }
  227. }
  228. if(errno != 0)
  229. {
  230. fprintf(stderr, "uniq: error: Failed reading: %s\n", strerror(errno));
  231. return 1;
  232. }
  233. return 0;
  234. }