commit: e8e83b222ec147ea0a4672030b2ed8d02d84c8da
parent 6eff20f7c6043602f65be89ae05061da48756fa0
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Fri, 15 Nov 2024 09:41:52 +0100
cmd/yes: pre-fill a buffer for the write-loop
Interestingly, it seems faster than GNU coreutils' implementation of yes(1):
  $ timeout 30 /usr/bin/yes | pv -trb > /dev/null
  41.6GiB 0:00:30 [1.39GiB/s]
  $ timeout 30 ./cmd/yes | pv -trb > /dev/null
   102GiB 0:00:30 [3.43GiB/s]
And definitely faster than before this commit:
  $ timeout 30 /opt/lanodan/bin/yes | pv -trb > /dev/null
  4.15GiB 0:00:30 [ 141MiB/s]
Likely due to using PAGESIZE (4096 on this system) instead of 1024.
Which also means much better measurements of ./cmd/wc than were done in
commit 4c21c817b83edddf81a3b34f42bba156062de792
  $ timeout 10 ./cmd/yes | pv -trb | /opt/lanodan/bin/cat > /dev/null
  7.57GiB 0:00:09 [ 775MiB/s]
  $ timeout 10 ./cmd/yes | pv -trb | ./cmd/wc -l > /dev/null
  7.98GiB 0:00:09 [ 817MiB/s]
  $ timeout 10 ./cmd/yes | pv -trb | ./cmd/wc -c > /dev/null
  25.0GiB 0:00:09 [2.50GiB/s]
Diffstat:
| M | cmd/yes.c | 54 | +++++++++++++++++++++++++++++++++++++++--------------- | 
1 file changed, 39 insertions(+), 15 deletions(-)
diff --git a/cmd/yes.c b/cmd/yes.c
@@ -4,50 +4,74 @@
 
 #define _POSIX_C_SOURCE 200809L
 #include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>  // fwrite, perror, ferror
+#include <errno.h>
+#include <limits.h> // PAGESIZE
+#include <stdio.h>  // perror
 #include <string.h> // strlen
+#include <unistd.h> // write
 
 int
 main(int argc, char *argv[])
 {
-	size_t arg_len = 0;
-
-	assert(argc >= 1);
+	char *args = NULL;
+	size_t args_len = 0;
 
 	if(argc == 1)
 	{
-		argv[0][0] = 'y';
-		argv[0][1] = '\n';
-		argv[0][2] = '\0';
-		arg_len = 2;
+		char yes[3] = {'y', '\n', '\0'};
+		args = yes;
+		args_len = 2;
 	}
 	else
 	{
 		argv++;
 		argc--;
 
+		args = *argv;
+
 		for(int i = 0; i < argc; i++)
 		{
 			size_t len = strlen(argv[i]);
 			argv[i][len] = ' ';
-			arg_len += len + 1; // str + space
+			args_len += len + 1; // str + space
 		}
 
-		if(arg_len == 0)
+		if(args_len == 0)
 		{
 			argv[0][0] = '\n';
 			argv[0][1] = '\0';
-			arg_len = 1;
+			args_len = 1;
 		}
 		else
-			argv[0][arg_len - 1] = '\n';
+			argv[0][args_len - 1] = '\n';
+	}
+
+	size_t buflen = args_len;
+	char *buf = args;
+
+	// Large buffer to avoid write-speed being syscall-bound
+	// PAGESIZE was chosen because yes(1) is likely going to be piped
+	if(args_len < PAGESIZE)
+	{
+		static char page_buf[PAGESIZE] = "";
+		buf = page_buf;
+
+		size_t i = 0;
+		do
+		{
+			memcpy(buf + i, args, args_len);
+			i += args_len;
+		} while(i < (PAGESIZE - args_len));
+
+		buflen = i;
 	}
 
-	while(fwrite(*argv, arg_len, 1, stdout) == 1)
+	assert(errno == 0);
+
+	while(write(STDOUT_FILENO, buf, buflen) >= 1)
 		;
 
-	if(ferror(stdout))
+	if(errno != 0)
 	{
 		perror("yes: error: Failed writing");
 		return 1;