logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git

wc.c (6744B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../lib/bitmasks.h"
  6. #include <assert.h>
  7. #include <ctype.h> // isspace
  8. #include <errno.h>
  9. #include <fcntl.h> // posix_fadvise
  10. #include <locale.h> // setlocale
  11. #include <stdbool.h>
  12. #include <stdint.h> // uint8_t
  13. #include <stdio.h> // fprintf, fopen
  14. #include <stdlib.h> // abort
  15. #include <string.h> // strchr, strerror
  16. #include <sys/stat.h>
  17. #include <unistd.h> // getopt
  18. #include <wchar.h>
  19. #include <wctype.h> // iswspace
  20. #ifdef HAS_GETOPT_LONG
  21. #include <getopt.h>
  22. #endif
  23. #define WC_BUFSIZ 16320
  24. static char buf[WC_BUFSIZ] = "";
  25. static const char *argv0 = "wc";
  26. static enum {
  27. WC_OPT_C = 1 << 0,
  28. WC_OPT_L = 1 << 1,
  29. WC_OPT_W = 1 << 2,
  30. WC_OPT_ALL = WC_OPT_C | WC_OPT_L | WC_OPT_W,
  31. } wc_opts = 0;
  32. off_t total_bytes = 0, total_lines = 0, total_words = 0;
  33. static void
  34. print_counts(off_t lines, off_t words, off_t bytes, const char *filename)
  35. {
  36. const char *fmt = "%ld";
  37. if(FIELD_MATCH(wc_opts, WC_OPT_L))
  38. {
  39. printf(fmt, lines);
  40. fmt = " %ld";
  41. }
  42. if(FIELD_MATCH(wc_opts, WC_OPT_W))
  43. {
  44. printf(fmt, words);
  45. fmt = " %ld";
  46. }
  47. if(FIELD_MATCH(wc_opts, WC_OPT_C))
  48. {
  49. printf(fmt, bytes);
  50. }
  51. if(filename != NULL) printf(" %s", filename);
  52. printf("\n");
  53. }
  54. static int
  55. wc_file_bytes(int fd, char *filename)
  56. {
  57. off_t bytes = 0, lines = 0, words = 0, wordlen = 0;
  58. ssize_t nread = -1;
  59. if(FIELD_MATCH(wc_opts, WC_OPT_L) || FIELD_MATCH(wc_opts, WC_OPT_W))
  60. {
  61. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  62. {
  63. bytes += nread;
  64. for(ssize_t i = 0; i < nread; i++)
  65. {
  66. int c = buf[i];
  67. if(c == '\n') lines++;
  68. if(isspace(c))
  69. {
  70. if(wordlen > 0)
  71. {
  72. words++;
  73. wordlen = 0;
  74. }
  75. }
  76. else
  77. {
  78. wordlen++;
  79. }
  80. }
  81. }
  82. }
  83. else
  84. {
  85. struct stat status;
  86. if(fstat(fd, &status) < 0)
  87. {
  88. fprintf(stderr,
  89. "%s: error: Failed getting status for file '%s': %s\n",
  90. argv0,
  91. filename,
  92. strerror(errno));
  93. return 1;
  94. }
  95. if(S_ISREG(status.st_mode) && status.st_size != 0)
  96. {
  97. bytes += status.st_size;
  98. }
  99. else
  100. {
  101. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  102. bytes += nread;
  103. }
  104. }
  105. if(nread < 0 && errno != 0)
  106. {
  107. fprintf(stderr,
  108. "%s: error: Failed reading from file '%s': %s\n",
  109. argv0,
  110. filename != NULL ? filename : "<stdin>",
  111. strerror(errno));
  112. return -1;
  113. }
  114. if(wordlen > 0) words++;
  115. total_bytes += bytes, total_lines += lines, total_words += words;
  116. print_counts(lines, words, bytes, filename);
  117. return 0;
  118. }
  119. static int
  120. wc_file_chars(int fd, char *filename)
  121. {
  122. off_t chars = 0, lines = 0, words = 0, wordlen = 0;
  123. FILE *file = fdopen(fd, "r");
  124. if(file == NULL)
  125. {
  126. fprintf(stderr,
  127. "%s: error: Failed getting file stream for file '%s': %s\n",
  128. argv0,
  129. filename,
  130. strerror(errno));
  131. return -1;
  132. }
  133. if(setvbuf(file, buf, _IOFBF, WC_BUFSIZ) != 0)
  134. {
  135. fprintf(stderr,
  136. "%s: warning: Failed setting a new buffer for <stdin>: %s\n",
  137. argv0,
  138. strerror(errno));
  139. errno = 0;
  140. }
  141. while(true)
  142. {
  143. assert(errno == 0);
  144. wint_t c = getwc(file);
  145. if(c == WEOF)
  146. {
  147. if(errno != 0)
  148. {
  149. fprintf(stderr,
  150. "%s: error: Failed reading from file '%s': %s\n",
  151. argv0,
  152. filename != NULL ? filename : "<stdin>",
  153. strerror(errno));
  154. return -1;
  155. }
  156. break;
  157. }
  158. chars++;
  159. if(c == '\n') lines++;
  160. if(iswspace(c))
  161. {
  162. if(wordlen > 0)
  163. {
  164. words++;
  165. wordlen = 0;
  166. }
  167. }
  168. else
  169. {
  170. wordlen++;
  171. }
  172. }
  173. if(wordlen > 0) words++;
  174. print_counts(lines, words, chars, filename);
  175. return 0;
  176. }
  177. static void
  178. usage(void)
  179. {
  180. fprintf(stderr, "Usage: wc [-c|-m] [-lw] [file...]\n");
  181. }
  182. int
  183. main(int argc, char *argv[])
  184. {
  185. errno = 0;
  186. setlocale(LC_ALL, "");
  187. if(errno != 0)
  188. {
  189. fprintf(stderr, "%s: warning: Failed to initialize locales: %s\n", argv0, strerror(errno));
  190. errno = 0;
  191. }
  192. int (*wc_file)(int, char *) = &wc_file_bytes;
  193. int c = -1;
  194. #ifdef HAS_GETOPT_LONG
  195. // Strictly for GNUisms compatibility so no long-only options
  196. // clang-format off
  197. static struct option opts[] = {
  198. {"bytes", no_argument, 0, 'c'},
  199. {"lines", no_argument, 0, 'l'},
  200. {"chars", no_argument, 0, 'm'},
  201. {"words", no_argument, 0, 'w'},
  202. {0, 0, 0, 0},
  203. };
  204. // clang-format on
  205. // Need + as first character to get POSIX-style option parsing
  206. while((c = getopt_long(argc, argv, "+:clmw", opts, NULL)) != -1)
  207. #else
  208. while((c = getopt(argc, argv, ":clmw")) != -1)
  209. #endif
  210. {
  211. switch(c)
  212. {
  213. case 'c':
  214. wc_opts |= WC_OPT_C;
  215. wc_file = wc_file_bytes;
  216. break;
  217. case 'l':
  218. wc_opts |= WC_OPT_L;
  219. break;
  220. case 'm':
  221. wc_opts |= WC_OPT_C;
  222. wc_file = wc_file_chars;
  223. break;
  224. case 'w':
  225. wc_opts |= WC_OPT_W;
  226. break;
  227. case ':':
  228. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  229. usage();
  230. return 1;
  231. case '?':
  232. fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt);
  233. usage();
  234. return 1;
  235. default:
  236. abort();
  237. }
  238. }
  239. if(wc_opts == 0) wc_opts = WC_OPT_ALL;
  240. assert(errno == 0);
  241. argc -= optind;
  242. argv += optind;
  243. if((errno = posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
  244. {
  245. if(errno != ESPIPE)
  246. {
  247. fprintf(stderr,
  248. "%s: warning: Failure from posix_fadvise sequential for <stdin>: %s\n",
  249. argv0,
  250. strerror(errno));
  251. }
  252. errno = 0;
  253. }
  254. if(argc < 1)
  255. {
  256. if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
  257. }
  258. for(int i = 0; i < argc; i++)
  259. {
  260. char *path = argv[i];
  261. if(path[0] == '-' && path[1] == 0)
  262. {
  263. if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
  264. continue;
  265. }
  266. // https://www.austingroupbugs.net/view.php?id=251
  267. if(strchr(path, '\n') != NULL)
  268. fprintf(
  269. stderr,
  270. "%s: warning: Filename '%s' contains a newline while wc(1) uses newlines as separators\n",
  271. argv0,
  272. path);
  273. int arg_fd = open(path, O_RDONLY | O_NOCTTY);
  274. if(arg_fd < 0)
  275. {
  276. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, path, strerror(errno));
  277. return 1;
  278. }
  279. if((errno = posix_fadvise(arg_fd, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
  280. {
  281. fprintf(stderr,
  282. "%s: warning: Failure from posix_fadvise sequential for file '%s': %s\n",
  283. argv0,
  284. path,
  285. strerror(errno));
  286. errno = 0;
  287. }
  288. if(wc_file(arg_fd, path) < 0) return 1;
  289. if(close(arg_fd) < 0)
  290. {
  291. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, path, strerror(errno));
  292. return 1;
  293. }
  294. }
  295. if(argc > 1) print_counts(total_lines, total_words, total_bytes, "total");
  296. return 0;
  297. }