logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git

uniq.c (4643B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include <assert.h>
  6. #include <ctype.h> // isblank
  7. #include <errno.h>
  8. #include <stdbool.h>
  9. #include <stdio.h> // getline
  10. #include <stdlib.h> // atoi
  11. #include <string.h> // strncmp
  12. #include <unistd.h> // getopt
  13. enum uniq_mode
  14. {
  15. UNIQ, // default
  16. COUNT,
  17. ONLY_REPEAT,
  18. NO_REPEAT,
  19. };
  20. int
  21. main(int argc, char *argv[])
  22. {
  23. enum uniq_mode mode = UNIQ;
  24. unsigned long field = 0, shift = 0;
  25. char *endptr = NULL;
  26. int c = -1;
  27. while((c = getopt(argc, argv, ":cdf:s:u")) != -1)
  28. {
  29. switch(c)
  30. {
  31. case 'c':
  32. if(mode != UNIQ)
  33. {
  34. fprintf(stderr, "uniq: Error: can only pass one of [-c|-d|-u]\n");
  35. return 1;
  36. }
  37. mode = COUNT;
  38. break;
  39. case 'd':
  40. if(mode != UNIQ)
  41. {
  42. fprintf(stderr, "uniq: Error: can only pass one of [-c|-d|-u]\n");
  43. return 1;
  44. }
  45. mode = ONLY_REPEAT;
  46. break;
  47. case 'f':
  48. errno = 0;
  49. field = strtoul(optarg, &endptr, 0);
  50. if(errno != 0)
  51. {
  52. fprintf(stderr, "uniq: Error: Failed parsing '-f %s': %s\n", optarg, strerror(errno));
  53. return 1;
  54. }
  55. if(endptr != NULL && endptr[0] != 0)
  56. {
  57. fprintf(
  58. stderr, "uniq: Error: Non-numeric characters passed to '-f %s': %s\n", optarg, endptr);
  59. return 1;
  60. }
  61. break;
  62. case 's':
  63. errno = 0;
  64. shift = strtoul(optarg, &endptr, 0);
  65. if(errno != 0)
  66. {
  67. fprintf(stderr, "uniq: Error: Failed parsing '-f %s': %s\n", optarg, strerror(errno));
  68. return 1;
  69. }
  70. if(endptr != NULL && endptr[0] != 0)
  71. {
  72. fprintf(
  73. stderr, "uniq: Error: Non-numeric characters passed to '-f %s': %s\n", optarg, endptr);
  74. return 1;
  75. }
  76. break;
  77. case 'u':
  78. if(mode != UNIQ)
  79. {
  80. fprintf(stderr, "uniq: Error: can only pass one of [-c|-d|-u]\n");
  81. return 1;
  82. }
  83. mode = NO_REPEAT;
  84. break;
  85. default:
  86. abort();
  87. }
  88. }
  89. argc -= optind;
  90. argv += optind;
  91. assert(errno == 0);
  92. FILE *input = stdin;
  93. FILE *output = stdout;
  94. switch(argc)
  95. {
  96. case 0:
  97. break;
  98. case 1:
  99. input = fopen(argv[0], "r");
  100. if(input == NULL)
  101. {
  102. fprintf(stderr, "uniq: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  103. return 1;
  104. }
  105. break;
  106. case 2:
  107. input = fopen(argv[0], "r");
  108. if(input == NULL)
  109. {
  110. fprintf(stderr, "uniq: Failed opening input file '%s': %s\n", argv[0], strerror(errno));
  111. return 1;
  112. }
  113. output = fopen(argv[1], "w");
  114. if(output == NULL)
  115. {
  116. fprintf(stderr, "uniq: Failed opening output file '%s': %s\n", argv[1], strerror(errno));
  117. return 1;
  118. }
  119. break;
  120. default:
  121. fprintf(stderr, "uniq: Invalid number of arguments (%d), expected [0..2]\n", argc);
  122. return 1;
  123. }
  124. assert(errno == 0);
  125. char *first = NULL;
  126. ssize_t first_len = 0;
  127. size_t first_shift = 0;
  128. unsigned counter = 1;
  129. errno = 0;
  130. while(true)
  131. {
  132. assert(errno == 0);
  133. char *cur = NULL;
  134. size_t cur_size = 0;
  135. ssize_t cur_len = getline(&cur, &cur_size, input);
  136. size_t cur_shift = shift;
  137. if(cur_len > 0 && cur[cur_len - 1] == '\n')
  138. {
  139. cur[cur_len - 1] = 0;
  140. cur_len--;
  141. }
  142. if(field != 0)
  143. {
  144. ssize_t field_shift = 0;
  145. for(unsigned long i = 0; i < field; i++)
  146. {
  147. while(field_shift < cur_len && isblank(cur[field_shift]))
  148. field_shift++;
  149. while(field_shift < cur_len && !isblank(cur[field_shift]))
  150. field_shift++;
  151. }
  152. cur_shift += field_shift;
  153. }
  154. if(cur_shift > cur_len)
  155. {
  156. free(cur);
  157. cur_size = 0;
  158. cur = NULL;
  159. break;
  160. }
  161. //fprintf(stderr, "[debug] {cur_shift:%d} <%s>\n", cur_shift, cur+cur_shift);
  162. if(first != NULL)
  163. {
  164. if(cur != NULL && (cur_len - cur_shift == first_len - first_shift) &&
  165. strncmp(cur + cur_shift, first + first_shift, cur_len - cur_shift) == 0)
  166. {
  167. counter += 1;
  168. }
  169. else
  170. {
  171. switch(mode)
  172. {
  173. case UNIQ:
  174. fwrite(first, first_len, 1, output);
  175. fprintf(output, "\n");
  176. break;
  177. case ONLY_REPEAT:
  178. if(counter > 1)
  179. {
  180. fwrite(first, first_len, 1, output);
  181. fprintf(output, "\n");
  182. }
  183. break;
  184. case NO_REPEAT:
  185. if(counter == 1)
  186. {
  187. fwrite(first, first_len, 1, output);
  188. fprintf(output, "\n");
  189. }
  190. break;
  191. case COUNT:
  192. fprintf(output, "%d %s\n", counter, first);
  193. break;
  194. }
  195. counter = 1;
  196. free(first);
  197. }
  198. }
  199. if(cur_len < 0)
  200. {
  201. if(cur_size > 0) free(cur);
  202. break;
  203. }
  204. if(counter == 1)
  205. {
  206. first = cur;
  207. first_len = cur_len;
  208. first_shift = cur_shift;
  209. }
  210. }
  211. if(errno != 0)
  212. {
  213. fprintf(stderr, "uniq: Read error: %s\n", strerror(errno));
  214. return 1;
  215. }
  216. return 0;
  217. }