logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git
commit: e8e83b222ec147ea0a4672030b2ed8d02d84c8da
parent 6eff20f7c6043602f65be89ae05061da48756fa0
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Fri, 15 Nov 2024 09:41:52 +0100

cmd/yes: pre-fill a buffer for the write-loop

Interestingly, it seems faster than GNU coreutils' implementation of yes(1):

  $ timeout 30 /usr/bin/yes | pv -trb > /dev/null
  41.6GiB 0:00:30 [1.39GiB/s]
  $ timeout 30 ./cmd/yes | pv -trb > /dev/null
   102GiB 0:00:30 [3.43GiB/s]

And definitely faster than before this commit:

  $ timeout 30 /opt/lanodan/bin/yes | pv -trb > /dev/null
  4.15GiB 0:00:30 [ 141MiB/s]

Likely due to using PAGESIZE (4096 on this system) instead of 1024.

Which also means much better measurements of ./cmd/wc than were done in
commit 4c21c817b83edddf81a3b34f42bba156062de792

  $ timeout 10 ./cmd/yes | pv -trb | /opt/lanodan/bin/cat > /dev/null
  7.57GiB 0:00:09 [ 775MiB/s]
  $ timeout 10 ./cmd/yes | pv -trb | ./cmd/wc -l > /dev/null
  7.98GiB 0:00:09 [ 817MiB/s]
  $ timeout 10 ./cmd/yes | pv -trb | ./cmd/wc -c > /dev/null
  25.0GiB 0:00:09 [2.50GiB/s]

Diffstat:

Mcmd/yes.c54+++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 15 deletions(-)

diff --git a/cmd/yes.c b/cmd/yes.c @@ -4,50 +4,74 @@ #define _POSIX_C_SOURCE 200809L #include <assert.h> -#include <stdbool.h> -#include <stdio.h> // fwrite, perror, ferror +#include <errno.h> +#include <limits.h> // PAGESIZE +#include <stdio.h> // perror #include <string.h> // strlen +#include <unistd.h> // write int main(int argc, char *argv[]) { - size_t arg_len = 0; - - assert(argc >= 1); + char *args = NULL; + size_t args_len = 0; if(argc == 1) { - argv[0][0] = 'y'; - argv[0][1] = '\n'; - argv[0][2] = '\0'; - arg_len = 2; + char yes[3] = {'y', '\n', '\0'}; + args = yes; + args_len = 2; } else { argv++; argc--; + args = *argv; + for(int i = 0; i < argc; i++) { size_t len = strlen(argv[i]); argv[i][len] = ' '; - arg_len += len + 1; // str + space + args_len += len + 1; // str + space } - if(arg_len == 0) + if(args_len == 0) { argv[0][0] = '\n'; argv[0][1] = '\0'; - arg_len = 1; + args_len = 1; } else - argv[0][arg_len - 1] = '\n'; + argv[0][args_len - 1] = '\n'; + } + + size_t buflen = args_len; + char *buf = args; + + // Large buffer to avoid write-speed being syscall-bound + // PAGESIZE was chosen because yes(1) is likely going to be piped + if(args_len < PAGESIZE) + { + static char page_buf[PAGESIZE] = ""; + buf = page_buf; + + size_t i = 0; + do + { + memcpy(buf + i, args, args_len); + i += args_len; + } while(i < (PAGESIZE - args_len)); + + buflen = i; } - while(fwrite(*argv, arg_len, 1, stdout) == 1) + assert(errno == 0); + + while(write(STDOUT_FILENO, buf, buflen) >= 1) ; - if(ferror(stdout)) + if(errno != 0) { perror("yes: error: Failed writing"); return 1;