logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git

wc.c (5412B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../lib/bitmasks.h"
  6. #include <assert.h>
  7. #include <ctype.h> // isspace
  8. #include <errno.h>
  9. #include <locale.h> // setlocale
  10. #include <stdbool.h>
  11. #include <stdint.h> // uint8_t
  12. #include <stdio.h> // fprintf, fopen
  13. #include <stdlib.h> // abort
  14. #include <string.h> // strchr, strerror
  15. #include <sys/stat.h>
  16. #include <unistd.h> // getopt
  17. #include <wchar.h>
  18. #include <wctype.h> // iswspace
  19. #ifdef HAS_GETOPT_LONG
  20. #include <getopt.h>
  21. #endif
  22. static const char *argv0 = "wc";
  23. static enum {
  24. WC_OPT_C = 1 << 0,
  25. WC_OPT_L = 1 << 1,
  26. WC_OPT_W = 1 << 2,
  27. WC_OPT_ALL = WC_OPT_C | WC_OPT_L | WC_OPT_W,
  28. } wc_opts = 0;
  29. off_t total_bytes = 0, total_lines = 0, total_words = 0;
  30. static void
  31. print_counts(off_t lines, off_t words, off_t bytes, const char *filename)
  32. {
  33. const char *fmt = "%ld";
  34. if(FIELD_MATCH(wc_opts, WC_OPT_L))
  35. {
  36. printf(fmt, lines);
  37. fmt = " %ld";
  38. }
  39. if(FIELD_MATCH(wc_opts, WC_OPT_W))
  40. {
  41. printf(fmt, words);
  42. fmt = " %ld";
  43. }
  44. if(FIELD_MATCH(wc_opts, WC_OPT_C))
  45. {
  46. printf(fmt, bytes);
  47. }
  48. if(filename != NULL) printf(" %s", filename);
  49. printf("\n");
  50. }
  51. static int
  52. wc_file_bytes(FILE *file, char *filename)
  53. {
  54. off_t bytes = 0, lines = 0, words = 0, wordlen = 0;
  55. while(true)
  56. {
  57. assert(errno == 0);
  58. int c = getc(file);
  59. if(c == EOF)
  60. {
  61. if(errno != 0)
  62. {
  63. fprintf(stderr,
  64. "%s: Failed reading from file '%s': %s\n",
  65. argv0,
  66. filename != NULL ? filename : "<stdin>",
  67. strerror(errno));
  68. return -1;
  69. }
  70. break;
  71. }
  72. bytes++;
  73. if(c == '\n') lines++;
  74. if(isspace(c))
  75. {
  76. if(wordlen > 0)
  77. {
  78. words++;
  79. wordlen = 0;
  80. }
  81. }
  82. else
  83. {
  84. wordlen++;
  85. }
  86. }
  87. if(wordlen > 0) words++;
  88. total_bytes += bytes, total_lines += lines, total_words += words;
  89. print_counts(lines, words, bytes, filename);
  90. return 0;
  91. }
  92. static int
  93. wc_file_chars(FILE *file, char *filename)
  94. {
  95. off_t chars = 0, lines = 0, words = 0, wordlen = 0;
  96. while(true)
  97. {
  98. assert(errno == 0);
  99. wint_t c = getwc(file);
  100. if(c == WEOF)
  101. {
  102. if(errno != 0)
  103. {
  104. fprintf(stderr,
  105. "%s: Failed reading from file '%s': %s\n",
  106. argv0,
  107. filename != NULL ? filename : "<stdin>",
  108. strerror(errno));
  109. return -1;
  110. }
  111. break;
  112. }
  113. chars++;
  114. if(c == '\n') lines++;
  115. if(iswspace(c))
  116. {
  117. if(wordlen > 0)
  118. {
  119. words++;
  120. wordlen = 0;
  121. }
  122. }
  123. else
  124. {
  125. wordlen++;
  126. }
  127. }
  128. if(wordlen > 0) words++;
  129. print_counts(lines, words, chars, filename);
  130. return 0;
  131. }
  132. static void
  133. usage(void)
  134. {
  135. fprintf(stderr, "Usage: wc [-c|-m] [-lw] [file...]\n");
  136. }
  137. int
  138. main(int argc, char *argv[])
  139. {
  140. errno = 0;
  141. setlocale(LC_ALL, "");
  142. if(errno != 0)
  143. {
  144. fprintf(stderr, "%s: Warning: Failed to initialize locales: %s\n", argv0, strerror(errno));
  145. errno = 0;
  146. }
  147. int (*wc_file)(FILE *, char *) = &wc_file_bytes;
  148. int c = -1;
  149. #ifdef HAS_GETOPT_LONG
  150. // Strictly for GNUisms compatibility so no long-only options
  151. // clang-format off
  152. static struct option opts[] = {
  153. {"bytes", no_argument, 0, 'c'},
  154. {"lines", no_argument, 0, 'l'},
  155. {"chars", no_argument, 0, 'm'},
  156. {"words", no_argument, 0, 'w'},
  157. {0, 0, 0, 0},
  158. };
  159. // clang-format on
  160. // Need + as first character to get POSIX-style option parsing
  161. while((c = getopt_long(argc, argv, "+:clmw", opts, NULL)) != -1)
  162. #else
  163. while((c = getopt(argc, argv, ":clmw")) != -1)
  164. #endif
  165. {
  166. switch(c)
  167. {
  168. case 'c':
  169. wc_opts |= WC_OPT_C;
  170. wc_file = wc_file_bytes;
  171. break;
  172. case 'l':
  173. wc_opts |= WC_OPT_L;
  174. break;
  175. case 'm':
  176. wc_opts |= WC_OPT_C;
  177. wc_file = wc_file_chars;
  178. break;
  179. case 'w':
  180. wc_opts |= WC_OPT_W;
  181. break;
  182. case ':':
  183. fprintf(stderr, "%s: Error: Missing operand for option: '-%c'\n", argv0, optopt);
  184. usage();
  185. return 1;
  186. case '?':
  187. fprintf(stderr, "%s: Error: Unrecognised option: '-%c'\n", argv0, optopt);
  188. usage();
  189. return 1;
  190. default:
  191. abort();
  192. }
  193. }
  194. if(wc_opts == 0) wc_opts = WC_OPT_ALL;
  195. assert(errno == 0);
  196. argc -= optind;
  197. argv += optind;
  198. if(argc < 1)
  199. {
  200. if(wc_file(stdin, NULL) < 0) return 1;
  201. }
  202. for(int i = 0; i < argc; i++)
  203. {
  204. char *path = argv[i];
  205. if(path[0] == '-' && path[1] == 0)
  206. {
  207. if(wc_file(stdin, NULL) < 0) return 1;
  208. continue;
  209. }
  210. // https://www.austingroupbugs.net/view.php?id=251
  211. if(strchr(path, '\n') != NULL)
  212. fprintf(
  213. stderr,
  214. "%s: Warning: Filename '%s' contains a newline while wc(1) uses newlines as separators\n",
  215. argv0,
  216. path);
  217. if(wc_opts == WC_OPT_C && wc_file == wc_file_bytes)
  218. {
  219. struct stat status;
  220. if(stat(path, &status) < 0)
  221. {
  222. fprintf(
  223. stderr, "%s: Failed getting status for file '%s': %s\n", argv0, path, strerror(errno));
  224. return 1;
  225. }
  226. printf("%ld %s\n", status.st_size, path);
  227. total_bytes += status.st_size;
  228. continue;
  229. }
  230. FILE *file = fopen(path, "r");
  231. if(file == NULL)
  232. {
  233. fprintf(stderr, "%s: Failed opening file '%s': %s\n", argv0, path, strerror(errno));
  234. return 1;
  235. }
  236. if(wc_file(file, path) < 0) return 1;
  237. if(fclose(file) < 0)
  238. {
  239. fprintf(stderr, "%s: Failed closing file '%s': %s\n", argv0, path, strerror(errno));
  240. return 1;
  241. }
  242. }
  243. if(argc > 1) print_counts(total_lines, total_words, total_bytes, "total");
  244. return 0;
  245. }