commit: 4c21c817b83edddf81a3b34f42bba156062de792
parent 7b5f49a107df7eb8b0c8550de63da2fe993608c1
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Tue, 5 Nov 2024 20:37:15 +0100
cmd/wc: use fd for higher processing in bytes mode
As seen below it could still get some improvements but roughly,
'-c' got a 3× speedup and '-l' got a 2× speedup.
$ uname -a
Linux cloudchaser 6.6.38-gentoo #2 SMP PREEMPT_DYNAMIC Thu Aug 29 05:05:27 2024 x86_64
$ grep -e 'model name' /proc/cpuinfo | head -n 1
model name : AMD Ryzen 5 PRO 3500U w/ Radeon Vega Mobile Gfx
$ qfile -v /usr/bin/wc /opt/lanodan/bin/wc
sys-apps/coreutils-9.5: /usr/bin/wc
sys-apps/utils-std-9999: /opt/lanodan/bin/wc
$ timeout 10 /usr/bin/yes | pv -trb > /dev/null
12.2GiB 0:00:10 [1.22GiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | /opt/lanodan/bin/cat > /dev/null
6.16GiB 0:00:09 [ 630MiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | /usr/bin/wc -c > /dev/null
7.71GiB 0:00:09 [ 789MiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | /usr/bin/wc -l > /dev/null
7.53GiB 0:00:09 [ 770MiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | /opt/lanodan/bin/wc -c > /dev/null
2.40GiB 0:00:10 [ 245MiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | /opt/lanodan/bin/wc -l > /dev/null
2.38GiB 0:00:09 [ 243MiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | ./cmd/wc -c > /dev/null
7.44GiB 0:00:09 [ 761MiB/s]
$ timeout 10 /usr/bin/yes | pv -trb | ./cmd/wc -l > /dev/null
5.48GiB 0:00:09 [ 560MiB/s]
Diffstat:
M | cmd/wc.c | 124 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------- |
1 file changed, 88 insertions(+), 36 deletions(-)
diff --git a/cmd/wc.c b/cmd/wc.c
@@ -8,6 +8,7 @@
#include <assert.h>
#include <ctype.h> // isspace
#include <errno.h>
+#include <fcntl.h> // posix_fadvise
#include <locale.h> // setlocale
#include <stdbool.h>
#include <stdint.h> // uint8_t
@@ -22,6 +23,9 @@
#include <getopt.h>
#endif
+#define WC_BUFSIZ 16320
+static char buf[WC_BUFSIZ] = "";
+
static const char *argv0 = "wc";
static enum {
@@ -57,43 +61,49 @@ print_counts(off_t lines, off_t words, off_t bytes, const char *filename)
}
static int
-wc_file_bytes(FILE *file, char *filename)
+wc_file_bytes(int fd, char *filename)
{
off_t bytes = 0, lines = 0, words = 0, wordlen = 0;
- while(true)
- {
- assert(errno == 0);
- int c = getc(file);
- if(c == EOF)
- {
- if(errno != 0)
- {
- fprintf(stderr,
- "%s: error: Failed reading from file '%s': %s\n",
- argv0,
- filename != NULL ? filename : "<stdin>",
- strerror(errno));
- return -1;
- }
- break;
- }
- bytes++;
+ int lw = FIELD_MATCH(wc_opts, WC_OPT_L) || FIELD_MATCH(wc_opts, WC_OPT_W);
- if(c == '\n') lines++;
+ ssize_t nread = -1;
+ while((nread = read(fd, buf, WC_BUFSIZ)) > 0)
+ {
+ bytes += nread;
- if(isspace(c))
+ if(lw)
{
- if(wordlen > 0)
+ for(ssize_t i = 0; i < nread; i++)
{
- words++;
- wordlen = 0;
+ int c = buf[i];
+
+ if(c == '\n') lines++;
+
+ if(isspace(c))
+ {
+ if(wordlen > 0)
+ {
+ words++;
+ wordlen = 0;
+ }
+ }
+ else
+ {
+ wordlen++;
+ }
}
}
- else
- {
- wordlen++;
- }
+ }
+
+ if(nread < 0 && errno != 0)
+ {
+ fprintf(stderr,
+ "%s: error: Failed reading from file '%s': %s\n",
+ argv0,
+ filename != NULL ? filename : "<stdin>",
+ strerror(errno));
+ return -1;
}
if(wordlen > 0) words++;
@@ -106,10 +116,30 @@ wc_file_bytes(FILE *file, char *filename)
}
static int
-wc_file_chars(FILE *file, char *filename)
+wc_file_chars(int fd, char *filename)
{
off_t chars = 0, lines = 0, words = 0, wordlen = 0;
+ FILE *file = fdopen(fd, "r");
+ if(file == NULL)
+ {
+ fprintf(stderr,
+ "%s: error: Failed getting file stream for file '%s': %s\n",
+ argv0,
+ filename,
+ strerror(errno));
+ return -1;
+ }
+
+ if(setvbuf(file, buf, _IOFBF, WC_BUFSIZ) != 0)
+ {
+ fprintf(stderr,
+ "%s: warning: Failed setting a new buffer for <stdin>: %s\n",
+ argv0,
+ strerror(errno));
+ errno = 0;
+ }
+
while(true)
{
assert(errno == 0);
@@ -168,7 +198,7 @@ main(int argc, char *argv[])
fprintf(stderr, "%s: warning: Failed to initialize locales: %s\n", argv0, strerror(errno));
errno = 0;
}
- int (*wc_file)(FILE *, char *) = &wc_file_bytes;
+ int (*wc_file)(int, char *) = &wc_file_bytes;
int c = -1;
#ifdef HAS_GETOPT_LONG
@@ -224,9 +254,21 @@ main(int argc, char *argv[])
argc -= optind;
argv += optind;
+ if((errno = posix_fadvise(STDIN_FILENO, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
+ {
+ if(errno != ESPIPE)
+ {
+ fprintf(stderr,
+ "%s: warning: Failure from posix_fadvise sequential for <stdin>: %s\n",
+ argv0,
+ strerror(errno));
+ }
+ errno = 0;
+ }
+
if(argc < 1)
{
- if(wc_file(stdin, NULL) < 0) return 1;
+ if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
}
for(int i = 0; i < argc; i++)
@@ -234,7 +276,7 @@ main(int argc, char *argv[])
char *path = argv[i];
if(path[0] == '-' && path[1] == 0)
{
- if(wc_file(stdin, NULL) < 0) return 1;
+ if(wc_file(STDIN_FILENO, NULL) < 0) return 1;
continue;
}
@@ -265,16 +307,26 @@ main(int argc, char *argv[])
continue;
}
- FILE *file = fopen(path, "r");
- if(file == NULL)
+ int arg_fd = open(path, O_RDONLY | O_NOCTTY);
+ if(arg_fd < 0)
{
fprintf(stderr, "%s: error: Failed opening file '%s': %s\n", argv0, path, strerror(errno));
return 1;
}
- if(wc_file(file, path) < 0) return 1;
+ if((errno = posix_fadvise(arg_fd, 0, 0, POSIX_FADV_SEQUENTIAL)) != 0)
+ {
+ fprintf(stderr,
+ "%s: warning: Failure from posix_fadvise sequential for file '%s': %s\n",
+ argv0,
+ path,
+ strerror(errno));
+ errno = 0;
+ }
+
+ if(wc_file(arg_fd, path) < 0) return 1;
- if(fclose(file) < 0)
+ if(close(arg_fd) < 0)
{
fprintf(stderr, "%s: error: Failed closing file '%s': %s\n", argv0, path, strerror(errno));
return 1;