logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

wc.c (6551B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../lib/bitmasks.h"
  6. #include <ctype.h> // isspace
  7. #include <errno.h>
  8. #include <fcntl.h> // posix_fadvise
  9. #include <locale.h> // setlocale
  10. #include <stdbool.h>
  11. #include <stdint.h> // uint8_t
  12. #include <stdio.h> // fprintf, fopen
  13. #include <stdlib.h> // abort
  14. #include <string.h> // strchr, strerror
  15. #include <sys/stat.h>
  16. #include <unistd.h> // getopt
  17. #include <wchar.h>
  18. #include <wctype.h> // iswspace
  19. #ifdef HAS_GETOPT_LONG
  20. #include <getopt.h>
  21. #endif
  22. #define WC_BUFSIZ 16320
  23. static char buf[WC_BUFSIZ] = "";
  24. static const char *argv0 = "wc";
  25. static enum {
  26. WC_OPT_C = 1 << 0,
  27. WC_OPT_L = 1 << 1,
  28. WC_OPT_W = 1 << 2,
  29. WC_OPT_ALL = WC_OPT_C | WC_OPT_L | WC_OPT_W,
  30. } wc_opts = 0;
  31. off_t total_bytes = 0, total_lines = 0, total_words = 0;
  32. static void
  33. print_counts(off_t lines, off_t words, off_t bytes, const char *filename)
  34. {
  35. const char *fmt = "%ld";
  36. if(FIELD_MATCH(wc_opts, WC_OPT_L))
  37. {
  38. printf(fmt, lines);
  39. fmt = " %ld";
  40. }
  41. if(FIELD_MATCH(wc_opts, WC_OPT_W))
  42. {
  43. printf(fmt, words);
  44. fmt = " %ld";
  45. }
  46. if(FIELD_MATCH(wc_opts, WC_OPT_C))
  47. {
  48. printf(fmt, bytes);
  49. }
  50. if(filename != NULL) printf(" %s", filename);
  51. printf("\n");
  52. }
  53. static int
  54. wc_file_bytes(int fd, char *filename)
  55. {
  56. off_t bytes = 0, lines = 0, words = 0, wordlen = 0;
  57. ssize_t nread = -1;
  58. if(FIELD_MATCH(wc_opts, WC_OPT_L) || FIELD_MATCH(wc_opts, WC_OPT_W))
  59. {
  60. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  61. {
  62. bytes += nread;
  63. for(ssize_t i = 0; i < nread; i++)
  64. {
  65. int c = buf[i];
  66. if(c == '\n') lines++;
  67. if(isspace(c))
  68. {
  69. if(wordlen > 0)
  70. {
  71. words++;
  72. wordlen = 0;
  73. }
  74. }
  75. else
  76. {
  77. wordlen++;
  78. }
  79. }
  80. }
  81. }
  82. else
  83. {
  84. struct stat status;
  85. if(fstat(fd, &status) < 0)
  86. {
  87. fprintf(stderr,
  88. "%s: error: Failed getting status for file '%s': %s\n",
  89. argv0,
  90. filename,
  91. strerror(errno));
  92. return 1;
  93. }
  94. if(S_ISREG(status.st_mode) && status.st_size != 0)
  95. {
  96. bytes += status.st_size;
  97. }
  98. else
  99. {
  100. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  101. bytes += nread;
  102. }
  103. }
  104. if(nread < 0 && errno != 0)
  105. {
  106. fprintf(stderr,
  107. "%s: error: Failed reading from file '%s': %s\n",
  108. argv0,
  109. filename != NULL ? filename : "<stdin>",
  110. strerror(errno));
  111. return -1;
  112. }
  113. if(wordlen > 0) words++;
  114. total_bytes += bytes, total_lines += lines, total_words += words;
  115. print_counts(lines, words, bytes, filename);
  116. return 0;
  117. }
  118. static int
  119. wc_file_chars(int fd, char *filename)
  120. {
  121. off_t chars = 0, lines = 0, words = 0, wordlen = 0;
  122. FILE *file = fdopen(fd, "r");
  123. if(file == NULL)
  124. {
  125. fprintf(stderr,
  126. "%s: error: Failed getting file stream for file '%s': %s\n",
  127. argv0,
  128. filename,
  129. strerror(errno));
  130. return -1;
  131. }
  132. if(setvbuf(file, buf, _IOFBF, WC_BUFSIZ) != 0)
  133. {
  134. fprintf(stderr,
  135. "%s: warning: Failed setting a new buffer for <stdin>: %s\n",
  136. argv0,
  137. strerror(errno));
  138. errno = 0;
  139. }
  140. while(true)
  141. {
  142. wint_t c = getwc(file);
  143. if(c == WEOF)
  144. {
  145. if(errno != 0)
  146. {
  147. fprintf(stderr,
  148. "%s: error: Failed reading from file '%s': %s\n",
  149. argv0,
  150. filename != NULL ? filename : "<stdin>",
  151. strerror(errno));
  152. return -1;
  153. }
  154. break;
  155. }
  156. chars++;
  157. if(c == '\n') lines++;
  158. if(iswspace(c))
  159. {
  160. if(wordlen > 0)
  161. {
  162. words++;
  163. wordlen = 0;
  164. }
  165. }
  166. else
  167. {
  168. wordlen++;
  169. }
  170. }
  171. if(wordlen > 0) words++;
  172. print_counts(lines, words, chars, filename);
  173. return 0;
  174. }
  175. static void
  176. usage(void)
  177. {
  178. fprintf(stderr, "Usage: wc [-c|-m] [-lw] [file...]\n");
  179. }
  180. int
  181. main(int argc, char *argv[])
  182. {
  183. setlocale(LC_ALL, "");
  184. errno = 0;
  185. int (*wc_file)(int, char *) = &wc_file_bytes;
  186. int c = -1;
  187. #ifdef HAS_GETOPT_LONG
  188. // Strictly for GNUisms compatibility so no long-only options
  189. // clang-format off
  190. static struct option opts[] = {
  191. {"bytes", no_argument, 0, 'c'},
  192. {"lines", no_argument, 0, 'l'},
  193. {"chars", no_argument, 0, 'm'},
  194. {"words", no_argument, 0, 'w'},
  195. {0, 0, 0, 0},
  196. };
  197. // clang-format on
  198. // Need + as first character to get POSIX-style option parsing
  199. while((c = getopt_long(argc, argv, "+:clmw", opts, NULL)) != -1)
  200. #else
  201. while((c = getopt(argc, argv, ":clmw")) != -1)
  202. #endif
  203. {
  204. switch(c)
  205. {
  206. case 'c':
  207. wc_opts |= WC_OPT_C;
  208. wc_file = wc_file_bytes;
  209. break;
  210. case 'l':
  211. wc_opts |= WC_OPT_L;
  212. break;
  213. case 'm':
  214. wc_opts |= WC_OPT_C;
  215. wc_file = wc_file_chars;
  216. break;
  217. case 'w':
  218. wc_opts |= WC_OPT_W;
  219. break;
  220. case ':':
  221. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  222. usage();
  223. return 1;
  224. case '?':
  225. fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt);
  226. usage();
  227. return 1;
  228. default:
  229. abort();
  230. }
  231. }
  232. if(wc_opts == 0) wc_opts = WC_OPT_ALL;
  233. argc -= optind;
  234. argv += optind;
  235. if((errno = posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
  236. {
  237. if(errno != ESPIPE)
  238. {
  239. fprintf(stderr,
  240. "%s: warning: Failure from posix_fadvise sequential for <stdin>: %s\n",
  241. argv0,
  242. strerror(errno));
  243. }
  244. errno = 0;
  245. }
  246. if(argc < 1)
  247. {
  248. if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
  249. }
  250. for(int i = 0; i < argc; i++)
  251. {
  252. char *path = argv[i];
  253. if(path[0] == '-' && path[1] == 0)
  254. {
  255. if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
  256. continue;
  257. }
  258. // https://www.austingroupbugs.net/view.php?id=251
  259. if(strchr(path, '\n') != NULL)
  260. fprintf(
  261. stderr,
  262. "%s: warning: Filename '%s' contains a newline while wc(1) uses newlines as separators\n",
  263. argv0,
  264. path);
  265. int arg_fd = open(path, O_RDONLY | O_NOCTTY);
  266. if(arg_fd < 0)
  267. {
  268. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, path, strerror(errno));
  269. return 1;
  270. }
  271. if((errno = posix_fadvise(arg_fd, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
  272. {
  273. fprintf(stderr,
  274. "%s: warning: Failure from posix_fadvise sequential for file '%s': %s\n",
  275. argv0,
  276. path,
  277. strerror(errno));
  278. errno = 0;
  279. }
  280. if(wc_file(arg_fd, path) < 0) return 1;
  281. if(close(arg_fd) < 0)
  282. {
  283. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, path, strerror(errno));
  284. return 1;
  285. }
  286. }
  287. if(argc > 1) print_counts(total_lines, total_words, total_bytes, "total");
  288. return 0;
  289. }