logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

wc.c (6971B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _POSIX_C_SOURCE 200809L
  5. #include "../config.h" // HAS_*
  6. #include "../lib/bitmasks.h"
  7. #include "../libutils/getopt_nolong.h"
  8. #include <ctype.h> // isspace
  9. #include <errno.h>
  10. #include <fcntl.h> // posix_fadvise
  11. #include <locale.h> // setlocale
  12. #include <stdbool.h>
  13. #include <stdint.h> // uint8_t
  14. #include <stdio.h> // fprintf
  15. #include <stdlib.h> // abort
  16. #include <string.h> // strchr, strerror
  17. #include <sys/stat.h>
  18. #include <unistd.h> // getopt
  19. #include <wchar.h>
  20. #include <wctype.h> // iswspace
  21. #ifdef HAS_GETOPT_LONG
  22. #include <getopt.h>
  23. #endif
  24. #define WC_BUFSIZ 16320
  25. static char buf[WC_BUFSIZ] = "";
  26. const char *argv0 = "wc";
  27. static enum {
  28. WC_OPT_C = 1 << 0,
  29. WC_OPT_L = 1 << 1,
  30. WC_OPT_W = 1 << 2,
  31. WC_OPT_ALL = WC_OPT_C | WC_OPT_L | WC_OPT_W,
  32. } wc_opts = 0;
  33. off_t total_bytes = 0, total_lines = 0, total_words = 0;
  34. static void
  35. print_counts(off_t lines, off_t words, off_t bytes, const char *filename)
  36. {
  37. const char *fmt = "%ld";
  38. if(FIELD_MATCH(wc_opts, WC_OPT_L))
  39. {
  40. printf(fmt, lines);
  41. fmt = " %ld";
  42. }
  43. if(FIELD_MATCH(wc_opts, WC_OPT_W))
  44. {
  45. printf(fmt, words);
  46. fmt = " %ld";
  47. }
  48. if(FIELD_MATCH(wc_opts, WC_OPT_C))
  49. {
  50. printf(fmt, bytes);
  51. }
  52. if(filename != NULL) printf(" %s", filename);
  53. printf("\n");
  54. }
  55. static int
  56. wc_file_bytes(int fd, char *filename)
  57. {
  58. off_t bytes = 0, lines = 0, words = 0, wordlen = 0;
  59. ssize_t nread = -1;
  60. if(FIELD_MATCH(wc_opts, WC_OPT_W))
  61. {
  62. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  63. {
  64. bytes += nread;
  65. for(ssize_t i = 0; i < nread; i++)
  66. {
  67. int c = buf[i];
  68. if(c == '\n') lines++;
  69. if(isspace(c))
  70. {
  71. if(wordlen > 0)
  72. {
  73. words++;
  74. wordlen = 0;
  75. }
  76. }
  77. else
  78. {
  79. wordlen++;
  80. }
  81. }
  82. }
  83. }
  84. else if(FIELD_MATCH(wc_opts, WC_OPT_L))
  85. {
  86. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  87. {
  88. bytes += nread;
  89. for(ssize_t i = 0; i < nread; i++)
  90. if(buf[i] == '\n') lines++;
  91. }
  92. }
  93. else
  94. {
  95. struct stat status;
  96. if(fstat(fd, &status) < 0)
  97. {
  98. fprintf(stderr,
  99. "%s: error: Failed getting status for file '%s': %s\n",
  100. argv0,
  101. filename,
  102. strerror(errno));
  103. return 1;
  104. }
  105. if(S_ISREG(status.st_mode) && status.st_size != 0)
  106. {
  107. bytes += status.st_size;
  108. }
  109. else
  110. {
  111. while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
  112. bytes += nread;
  113. }
  114. }
  115. if(nread < 0 && errno != 0)
  116. {
  117. fprintf(stderr,
  118. "%s: error: Failed reading from file '%s': %s\n",
  119. argv0,
  120. filename != NULL ? filename : "<stdin>",
  121. strerror(errno));
  122. return -1;
  123. }
  124. if(wordlen > 0) words++;
  125. total_bytes += bytes, total_lines += lines, total_words += words;
  126. print_counts(lines, words, bytes, filename);
  127. return 0;
  128. }
  129. static int
  130. wc_file_chars(int fd, char *filename)
  131. {
  132. off_t chars = 0, lines = 0, words = 0, wordlen = 0;
  133. FILE *file = fdopen(fd, "r");
  134. if(file == NULL)
  135. {
  136. fprintf(stderr,
  137. "%s: error: Failed getting file stream for file '%s': %s\n",
  138. argv0,
  139. filename,
  140. strerror(errno));
  141. return -1;
  142. }
  143. if(setvbuf(file, buf, _IOFBF, WC_BUFSIZ) != 0)
  144. {
  145. fprintf(stderr,
  146. "%s: warning: Failed setting a new buffer for <stdin>: %s\n",
  147. argv0,
  148. strerror(errno));
  149. errno = 0;
  150. }
  151. while(true)
  152. {
  153. errno = 0;
  154. wint_t c = getwc(file);
  155. if(c == WEOF)
  156. {
  157. if(errno != 0)
  158. {
  159. fprintf(stderr,
  160. "%s: error: Failed reading from file '%s': %s\n",
  161. argv0,
  162. filename != NULL ? filename : "<stdin>",
  163. strerror(errno));
  164. fclose(file);
  165. return -1;
  166. }
  167. break;
  168. }
  169. chars++;
  170. if(c == '\n') lines++;
  171. if(iswspace(c))
  172. {
  173. if(wordlen > 0)
  174. {
  175. words++;
  176. wordlen = 0;
  177. }
  178. }
  179. else
  180. {
  181. wordlen++;
  182. }
  183. }
  184. if(wordlen > 0) words++;
  185. print_counts(lines, words, chars, filename);
  186. fclose(file);
  187. return 0;
  188. }
  189. static void
  190. usage(void)
  191. {
  192. fprintf(stderr, "Usage: wc [-c|-m] [-lw] [file...]\n");
  193. }
  194. int
  195. main(int argc, char *argv[])
  196. {
  197. char *lc_all = setlocale(LC_ALL, "");
  198. if(lc_all == NULL)
  199. {
  200. fprintf(stderr,
  201. "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
  202. argv0,
  203. strerror(errno));
  204. }
  205. errno = 0;
  206. int (*wc_file)(int, char *) = &wc_file_bytes;
  207. #ifdef HAS_GETOPT_LONG
  208. // Strictly for GNUisms compatibility so no long-only options
  209. // clang-format off
  210. static struct option opts[] = {
  211. {"bytes", no_argument, NULL, 'c'},
  212. {"lines", no_argument, NULL, 'l'},
  213. {"chars", no_argument, NULL, 'm'},
  214. {"words", no_argument, NULL, 'w'},
  215. {0, 0, 0, 0},
  216. };
  217. // clang-format on
  218. // Need + as first character to get POSIX-style option parsing
  219. for(int c = -1; (c = getopt_long(argc, argv, "+:clmw", opts, NULL)) != -1;)
  220. #else
  221. for(int c = -1; (c = getopt_nolong(argc, argv, ":clmw")) != -1;)
  222. #endif
  223. {
  224. switch(c)
  225. {
  226. case 'c':
  227. wc_opts |= WC_OPT_C;
  228. wc_file = wc_file_bytes;
  229. break;
  230. case 'l':
  231. wc_opts |= WC_OPT_L;
  232. break;
  233. case 'm':
  234. wc_opts |= WC_OPT_C;
  235. wc_file = wc_file_chars;
  236. break;
  237. case 'w':
  238. wc_opts |= WC_OPT_W;
  239. break;
  240. case ':':
  241. fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
  242. usage();
  243. return 1;
  244. case '?':
  245. GETOPT_UNKNOWN_OPT
  246. usage();
  247. return 1;
  248. default:
  249. abort();
  250. }
  251. }
  252. if(wc_opts == 0) wc_opts = WC_OPT_ALL;
  253. argc -= optind;
  254. argv += optind;
  255. if((errno = posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
  256. {
  257. if(errno != ESPIPE)
  258. {
  259. fprintf(stderr,
  260. "%s: warning: Failure from posix_fadvise sequential for <stdin>: %s\n",
  261. argv0,
  262. strerror(errno));
  263. }
  264. errno = 0;
  265. }
  266. if(argc < 1)
  267. {
  268. if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
  269. }
  270. for(int i = 0; i < argc; i++)
  271. {
  272. char *path = argv[i];
  273. if(path[0] == '-' && path[1] == 0)
  274. {
  275. if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
  276. continue;
  277. }
  278. // https://www.austingroupbugs.net/view.php?id=251
  279. if(strchr(path, '\n') != NULL)
  280. fprintf(
  281. stderr,
  282. "%s: warning: Filename '%s' contains a newline while wc(1) uses newlines as separators\n",
  283. argv0,
  284. path);
  285. int arg_fd = open(path, O_RDONLY | O_NOCTTY);
  286. if(arg_fd < 0)
  287. {
  288. fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, path, strerror(errno));
  289. return 1;
  290. }
  291. if((errno = posix_fadvise(arg_fd, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
  292. {
  293. fprintf(stderr,
  294. "%s: warning: Failure from posix_fadvise sequential for file '%s': %s\n",
  295. argv0,
  296. path,
  297. strerror(errno));
  298. errno = 0;
  299. }
  300. if(wc_file(arg_fd, path) < 0) return 1;
  301. if(close(arg_fd) < 0)
  302. {
  303. fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, path, strerror(errno));
  304. return 1;
  305. }
  306. }
  307. if(argc > 1) print_counts(total_lines, total_words, total_bytes, "total");
  308. return 0;
  309. }