logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/
commit: 958f574988efd83006fa746ec77c38c673e58579
parent b11ba1dcaf9fdef7463b4e1150510eaf877c27b7
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Mon,  6 Jan 2025 09:26:51 +0100

cmd/shuf: new

Diffstat:

Acmd/shuf.150++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/shuf.c180+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcoreutils.txt2+-
3 files changed, 231 insertions(+), 1 deletion(-)

diff --git a/cmd/shuf.1 b/cmd/shuf.1 @@ -0,0 +1,50 @@ +.\" utils-std: Collection of commonly available Unix tools +.\" Copyright 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +.\" SPDX-License-Identifier: MPL-2.0 +.Dd 2025-01-06 +.Dt SHUF 1 +.Os +.Sh NAME +.Nm shuf +.Nd print contents shuffled +.Sh SYNOPSIS +.Nm +.Op Fl z +.Op Fl n Ar num +.Op Ar file... +.Nm +.Fl e +.Op Fl z +.Op Fl n Ar num +.Op Ar string... +.Sh DESCRIPTION +.Nm +reads each +.Ar file +in sequence and writes it on the standard output with some shuffling applied to each line. +If no +.Ar file +is given, +.Nm +reads from the standard input. +.Sh OPTIONS +.Bl -tag -width _n_num +.It Fl e +Use each +.Ar string +as an input line. +.It Fl n Ar num +Output at most +.Ar num +lines. +.It Fl z +Use NULL as line delimiter, not newline. +.El +.Sh EXIT STATUS +.Ex -std +.Sh HISTORY +An +.Nm +utility appeared in GNU coreutils 6.0. +.Sh AUTHORS +.An Haelwenn (lanodan) Monnier Aq Mt contact+utils@hacktivis.me diff --git a/cmd/shuf.c b/cmd/shuf.c @@ -0,0 +1,180 @@ +// utils-std: Collection of commonly available Unix tools +// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +// SPDX-License-Identifier: MPL-2.0 + +#define _POSIX_C_SOURCE 200809L + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> // getdelim, fprintf +#include <stdlib.h> // free, malloc, srand, rand, exit +#include <string.h> // strerror, memcpy +#include <time.h> // time +#include <unistd.h> // getopt + +// Not a full shuffle, if there is more than 512 lines then last lines are never going to be printed first. +// But this allows bounded memory usage. + +// FIXME: handle newline-less lines + +const char *argv0 = "shuf"; + +#define LINES_LEN 512 +static char *lines[LINES_LEN]; +static char delim = '\n'; + +char *line = NULL; +size_t line_len = 0; + +int wrote = 0; +int write_limit = 0; + +static int +shuf(FILE *in, const char *fname) +{ + for(int ln = 0;; ln++) + { + errno = 0; + ssize_t nread = getdelim(&line, &line_len, delim, in); + if(errno != 0) + { + fprintf(stderr, + "%s: error: Failed reading line %d from file \"%s\": %s\n", + argv0, + ln, + fname, + strerror(errno)); + return 1; + } + + if(nread < 0) return 0; + + errno = 0; + char *dup = malloc(nread); + if(!dup) + { + fprintf( + stderr, + "%s: error: Failed to allocate %zd bytes of memory for line %d from file \"%s\": %s\n", + argv0, + nread, + ln, + fname, + strerror(errno)); + return 1; + } + memcpy(dup, line, nread); + + int p = rand() % LINES_LEN; + if(lines[p] != NULL) + { + fputs(lines[p], stdout); + free(lines[p]); + lines[p] = NULL; + wrote++; + } + + lines[p] = dup; + + if(write_limit != 0 && write_limit >= wrote) exit(0); + } +} + +static void +usage(void) +{ + fputs("Usage: shuf [-z] [files...]\n", stderr); +} + +int +main(int argc, char *argv[]) +{ + bool e_flag = false; + srand((int)time(NULL)); + + for(int c = -1; (c = getopt(argc, argv, ":enz")) != -1;) + { + switch(c) + { + case 'e': + e_flag = true; + break; + case 'n': + write_limit = atoi(optarg); + break; + case 'z': + delim = '\0'; + break; + case ':': + fprintf(stderr, "%s: error: Missing operand for option: '-%c'\n", argv0, optopt); + usage(); + return 1; + case '?': + fprintf(stderr, "%s: error: Unrecognised option: '-%c'\n", argv0, optopt); + usage(); + return 1; + } + } + + argc -= optind; + argv += optind; + + if(e_flag) + { + // Fisher-Yates shuffles + for(int i = 0; i <= argc - 2; i++) + { + int p = rand() % argc; + + // swap + char *tmp = argv[p]; + argv[p] = argv[argc - 1]; + argv[argc - 1] = tmp; + } + + int limit = argc; + if(write_limit != 0 && write_limit < argc) limit = write_limit; + + for(int i = 0; i < limit; i++) + { + printf("%s%c", argv[i], delim); + } + + return 0; + } + + for(int i = 0; i < LINES_LEN; i++) + lines[i] = NULL; + + if(argc <= 0) + { + if(shuf(stdin, "<stdin>") != 0) return 1; + } + else + { + for(int i = 0; i < argc; i++) + { + FILE *in = fopen(argv[i], "r"); + + if(shuf(in, argv[i]) != 0) + { + fclose(in); + return 1; + } + + fclose(in); + } + } + + // inserts are random so iterating on it is fine + for(int i = 0; i < LINES_LEN; i++) + { + if(lines[i] != NULL) + { + fputs(lines[i], stdout); + free(lines[i]); + } + } + + return 0; +} diff --git a/coreutils.txt b/coreutils.txt @@ -74,7 +74,7 @@ sha256sum: Done sha384sum: No sha512sum: Done shred: ? -shuf: Todo +shuf: Done sleep: Done sort: ? split: Done