wc.c (6551B)
- // utils-std: Collection of commonly available Unix tools
- // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
- // SPDX-License-Identifier: MPL-2.0
- #define _POSIX_C_SOURCE 200809L
- #include "../lib/bitmasks.h"
- #include <ctype.h> // isspace
- #include <errno.h>
- #include <fcntl.h> // posix_fadvise
- #include <locale.h> // setlocale
- #include <stdbool.h>
- #include <stdint.h> // uint8_t
- #include <stdio.h> // fprintf, fopen
- #include <stdlib.h> // abort
- #include <string.h> // strchr, strerror
- #include <sys/stat.h>
- #include <unistd.h> // getopt
- #include <wchar.h>
- #include <wctype.h> // iswspace
- #ifdef HAS_GETOPT_LONG
- #include <getopt.h>
- #endif
- #define WC_BUFSIZ 16320
- static char buf[WC_BUFSIZ] = "";
- static const char *argv0 = "wc";
- static enum {
- WC_OPT_C = 1 << 0,
- WC_OPT_L = 1 << 1,
- WC_OPT_W = 1 << 2,
- WC_OPT_ALL = WC_OPT_C | WC_OPT_L | WC_OPT_W,
- } wc_opts = 0;
- off_t total_bytes = 0, total_lines = 0, total_words = 0;
- static void
- print_counts(off_t lines, off_t words, off_t bytes, const char *filename)
- {
- const char *fmt = "%ld";
- if(FIELD_MATCH(wc_opts, WC_OPT_L))
- {
- printf(fmt, lines);
- fmt = " %ld";
- }
- if(FIELD_MATCH(wc_opts, WC_OPT_W))
- {
- printf(fmt, words);
- fmt = " %ld";
- }
- if(FIELD_MATCH(wc_opts, WC_OPT_C))
- {
- printf(fmt, bytes);
- }
- if(filename != NULL) printf(" %s", filename);
- printf("\n");
- }
- static int
- wc_file_bytes(int fd, char *filename)
- {
- off_t bytes = 0, lines = 0, words = 0, wordlen = 0;
- ssize_t nread = -1;
- if(FIELD_MATCH(wc_opts, WC_OPT_L) || FIELD_MATCH(wc_opts, WC_OPT_W))
- {
- while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
- {
- bytes += nread;
- for(ssize_t i = 0; i < nread; i++)
- {
- int c = buf[i];
- if(c == '\n') lines++;
- if(isspace(c))
- {
- if(wordlen > 0)
- {
- words++;
- wordlen = 0;
- }
- }
- else
- {
- wordlen++;
- }
- }
- }
- }
- else
- {
- struct stat status;
- if(fstat(fd, &status) < 0)
- {
- fprintf(stderr,
- "%s: error: Failed getting status for file '%s': %s\n",
- argv0,
- filename,
- strerror(errno));
- return 1;
- }
- if(S_ISREG(status.st_mode) && status.st_size != 0)
- {
- bytes += status.st_size;
- }
- else
- {
- while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
- bytes += nread;
- }
- }
- if(nread < 0 && errno != 0)
- {
- fprintf(stderr,
- "%s: error: Failed reading from file '%s': %s\n",
- argv0,
- filename != NULL ? filename : "<stdin>",
- strerror(errno));
- return -1;
- }
- if(wordlen > 0) words++;
- total_bytes += bytes, total_lines += lines, total_words += words;
- print_counts(lines, words, bytes, filename);
- return 0;
- }
- static int
- wc_file_chars(int fd, char *filename)
- {
- off_t chars = 0, lines = 0, words = 0, wordlen = 0;
- FILE *file = fdopen(fd, "r");
- if(file == NULL)
- {
- fprintf(stderr,
- "%s: error: Failed getting file stream for file '%s': %s\n",
- argv0,
- filename,
- strerror(errno));
- return -1;
- }
- if(setvbuf(file, buf, _IOFBF, WC_BUFSIZ) != 0)
- {
- fprintf(stderr,
- "%s: warning: Failed setting a new buffer for <stdin>: %s\n",
- argv0,
- strerror(errno));
- errno = 0;
- }
- while(true)
- {
- wint_t c = getwc(file);
- if(c == WEOF)
- {
- if(errno != 0)
- {
- fprintf(stderr,
- "%s: error: Failed reading from file '%s': %s\n",
- argv0,
- filename != NULL ? filename : "<stdin>",
- strerror(errno));
- return -1;
- }
- break;
- }
- chars++;
- if(c == '\n') lines++;
- if(iswspace(c))
- {
- if(wordlen > 0)
- {
- words++;
- wordlen = 0;
- }
- }
- else
- {
- wordlen++;
- }
- }
- if(wordlen > 0) words++;
- print_counts(lines, words, chars, filename);
- return 0;
- }
- static void
- usage(void)
- {
- fprintf(stderr, "Usage: wc [-c|-m] [-lw] [file...]\n");
- }
- int
- main(int argc, char *argv[])
- {
- setlocale(LC_ALL, "");
- errno = 0;
- int (*wc_file)(int, char *) = &wc_file_bytes;
- int c = -1;
- #ifdef HAS_GETOPT_LONG
- // Strictly for GNUisms compatibility so no long-only options
- // clang-format off
- static struct option opts[] = {
- {"bytes", no_argument, 0, 'c'},
- {"lines", no_argument, 0, 'l'},
- {"chars", no_argument, 0, 'm'},
- {"words", no_argument, 0, 'w'},
- {0, 0, 0, 0},
- };
- // clang-format on
- // Need + as first character to get POSIX-style option parsing
- while((c = getopt_long(argc, argv, "+:clmw", opts, NULL)) != -1)
- #else
- while((c = getopt(argc, argv, ":clmw")) != -1)
- #endif
- {
- switch(c)
- {
- case 'c':
- wc_opts |= WC_OPT_C;
- wc_file = wc_file_bytes;
- break;
- case 'l':
- wc_opts |= WC_OPT_L;
- break;
- case 'm':
- wc_opts |= WC_OPT_C;
- wc_file = wc_file_chars;
- break;
- case 'w':
- wc_opts |= WC_OPT_W;
- break;
- case ':':
- fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
- usage();
- return 1;
- case '?':
- fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt);
- usage();
- return 1;
- default:
- abort();
- }
- }
- if(wc_opts == 0) wc_opts = WC_OPT_ALL;
- argc -= optind;
- argv += optind;
- if((errno = posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
- {
- if(errno != ESPIPE)
- {
- fprintf(stderr,
- "%s: warning: Failure from posix_fadvise sequential for <stdin>: %s\n",
- argv0,
- strerror(errno));
- }
- errno = 0;
- }
- if(argc < 1)
- {
- if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
- }
- for(int i = 0; i < argc; i++)
- {
- char *path = argv[i];
- if(path[0] == '-' && path[1] == 0)
- {
- if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
- continue;
- }
- // https://www.austingroupbugs.net/view.php?id=251
- if(strchr(path, '\n') != NULL)
- fprintf(
- stderr,
- "%s: warning: Filename '%s' contains a newline while wc(1) uses newlines as separators\n",
- argv0,
- path);
- int arg_fd = open(path, O_RDONLY | O_NOCTTY);
- if(arg_fd < 0)
- {
- fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, path, strerror(errno));
- return 1;
- }
- if((errno = posix_fadvise(arg_fd, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
- {
- fprintf(stderr,
- "%s: warning: Failure from posix_fadvise sequential for file '%s': %s\n",
- argv0,
- path,
- strerror(errno));
- errno = 0;
- }
- if(wc_file(arg_fd, path) < 0) return 1;
- if(close(arg_fd) < 0)
- {
- fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, path, strerror(errno));
- return 1;
- }
- }
- if(argc > 1) print_counts(total_lines, total_words, total_bytes, "total");
- return 0;
- }