commit: 958f574988efd83006fa746ec77c38c673e58579
parent b11ba1dcaf9fdef7463b4e1150510eaf877c27b7
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Mon, 6 Jan 2025 09:26:51 +0100
cmd/shuf: new
Diffstat:
A | cmd/shuf.1 | 50 | ++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | cmd/shuf.c | 180 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
M | coreutils.txt | 2 | +- |
3 files changed, 231 insertions(+), 1 deletion(-)
diff --git a/cmd/shuf.1 b/cmd/shuf.1
@@ -0,0 +1,50 @@
+.\" utils-std: Collection of commonly available Unix tools
+.\" Copyright 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+.\" SPDX-License-Identifier: MPL-2.0
+.Dd 2025-01-06
+.Dt SHUF 1
+.Os
+.Sh NAME
+.Nm shuf
+.Nd print contents shuffled
+.Sh SYNOPSIS
+.Nm
+.Op Fl z
+.Op Fl n Ar num
+.Op Ar file...
+.Nm
+.Fl e
+.Op Fl z
+.Op Fl n Ar num
+.Op Ar string...
+.Sh DESCRIPTION
+.Nm
+reads each
+.Ar file
+in sequence and writes it on the standard output with some shuffling applied to each line.
+If no
+.Ar file
+is given,
+.Nm
+reads from the standard input.
+.Sh OPTIONS
+.Bl -tag -width _n_num
+.It Fl e
+Use each
+.Ar string
+as an input line.
+.It Fl n Ar num
+Output at most
+.Ar num
+lines.
+.It Fl z
+Use NULL as line delimiter, not newline.
+.El
+.Sh EXIT STATUS
+.Ex -std
+.Sh HISTORY
+An
+.Nm
+utility appeared in GNU coreutils 6.0.
+.Sh AUTHORS
+.An Haelwenn (lanodan) Monnier Aq Mt contact+utils@hacktivis.me
diff --git a/cmd/shuf.c b/cmd/shuf.c
@@ -0,0 +1,180 @@
+// utils-std: Collection of commonly available Unix tools
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h> // getdelim, fprintf
+#include <stdlib.h> // free, malloc, srand, rand, exit
+#include <string.h> // strerror, memcpy
+#include <time.h> // time
+#include <unistd.h> // getopt
+
+// Not a full shuffle, if there is more than 512 lines then last lines are never going to be printed first.
+// But this allows bounded memory usage.
+
+// FIXME: handle newline-less lines
+
+const char *argv0 = "shuf";
+
+#define LINES_LEN 512
+static char *lines[LINES_LEN];
+static char delim = '\n';
+
+char *line = NULL;
+size_t line_len = 0;
+
+int wrote = 0;
+int write_limit = 0;
+
+static int
+shuf(FILE *in, const char *fname)
+{
+ for(int ln = 0;; ln++)
+ {
+ errno = 0;
+ ssize_t nread = getdelim(&line, &line_len, delim, in);
+ if(errno != 0)
+ {
+ fprintf(stderr,
+ "%s: error: Failed reading line %d from file \"%s\": %s\n",
+ argv0,
+ ln,
+ fname,
+ strerror(errno));
+ return 1;
+ }
+
+ if(nread < 0) return 0;
+
+ errno = 0;
+ char *dup = malloc(nread);
+ if(!dup)
+ {
+ fprintf(
+ stderr,
+ "%s: error: Failed to allocate %zd bytes of memory for line %d from file \"%s\": %s\n",
+ argv0,
+ nread,
+ ln,
+ fname,
+ strerror(errno));
+ return 1;
+ }
+ memcpy(dup, line, nread);
+
+ int p = rand() % LINES_LEN;
+ if(lines[p] != NULL)
+ {
+ fputs(lines[p], stdout);
+ free(lines[p]);
+ lines[p] = NULL;
+ wrote++;
+ }
+
+ lines[p] = dup;
+
+ if(write_limit != 0 && write_limit >= wrote) exit(0);
+ }
+}
+
+static void
+usage(void)
+{
+ fputs("Usage: shuf [-z] [files...]\n", stderr);
+}
+
+int
+main(int argc, char *argv[])
+{
+ bool e_flag = false;
+ srand((int)time(NULL));
+
+ for(int c = -1; (c = getopt(argc, argv, ":enz")) != -1;)
+ {
+ switch(c)
+ {
+ case 'e':
+ e_flag = true;
+ break;
+ case 'n':
+ write_limit = atoi(optarg);
+ break;
+ case 'z':
+ delim = '\0';
+ break;
+ case ':':
+ fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt);
+ usage();
+ return 1;
+ case '?':
+ fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt);
+ usage();
+ return 1;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if(e_flag)
+ {
+ // Fisher-Yates shuffles
+ for(int i = 0; i <= argc - 2; i++)
+ {
+ int p = rand() % argc;
+
+ // swap
+ char *tmp = argv[p];
+ argv[p] = argv[argc - 1];
+ argv[argc - 1] = tmp;
+ }
+
+ int limit = argc;
+ if(write_limit != 0 && write_limit < argc) limit = write_limit;
+
+ for(int i = 0; i < limit; i++)
+ {
+ printf("%s%c", argv[i], delim);
+ }
+
+ return 0;
+ }
+
+ for(int i = 0; i < LINES_LEN; i++)
+ lines[i] = NULL;
+
+ if(argc <= 0)
+ {
+ if(shuf(stdin, "<stdin>") != 0) return 1;
+ }
+ else
+ {
+ for(int i = 0; i < argc; i++)
+ {
+ FILE *in = fopen(argv[i], "r");
+
+ if(shuf(in, argv[i]) != 0)
+ {
+ fclose(in);
+ return 1;
+ }
+
+ fclose(in);
+ }
+ }
+
+ // inserts are random so iterating on it is fine
+ for(int i = 0; i < LINES_LEN; i++)
+ {
+ if(lines[i] != NULL)
+ {
+ fputs(lines[i], stdout);
+ free(lines[i]);
+ }
+ }
+
+ return 0;
+}
diff --git a/coreutils.txt b/coreutils.txt
@@ -74,7 +74,7 @@ sha256sum: Done
sha384sum: No
sha512sum: Done
shred: ?
-shuf: Todo
+shuf: Done
sleep: Done
sort: ?
split: Done