commit: fdd47a4aef8b85e440c2c0bbee1a72947521ab9e
parent a9e81d3a107310775ac4d37b44bff45e61c442b6
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Thu, 15 Aug 2024 21:43:31 +0200
cmd/split: new
Diffstat:
7 files changed, 291 insertions(+), 3 deletions(-)
diff --git a/Makefile b/Makefile
@@ -150,6 +150,10 @@ cmd/head: cmd/head.c lib/truncation.c lib/truncation.h Makefile
$(RM) -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno}
$(CC) -std=c99 $(CFLAGS) -o $@ cmd/head.c lib/truncation.c $(LDFLAGS) $(LDSTATIC)
+cmd/split: cmd/split.c lib/truncation.c lib/truncation.h lib/fs.c lib/fs.h Makefile
+ $(RM) -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno}
+ $(CC) -std=c99 $(CFLAGS) -o $@ cmd/split.c lib/truncation.c lib/fs.c $(LDFLAGS) $(LDSTATIC)
+
cmd/tr: cmd/tr.c lib/tr_str.c lib/tr_str.h Makefile
$(RM) -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno}
$(CC) -std=c99 $(CFLAGS) -o $@ cmd/tr.c lib/tr_str.c $(LDFLAGS) $(LDSTATIC)
diff --git a/cmd/split.c b/cmd/split.c
@@ -0,0 +1,212 @@
+// utils-std: Collection of commonly available Unix tools
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
+
+#define _POSIX_C_SOURCE 200809L
+
+#include "../lib/fs.h" // auto_file_copy
+#include "../lib/truncation.h" // apply_size_suffix
+
+#include <errno.h>
+#include <fcntl.h> // open
+#include <limits.h> // NAME_MAX
+#include <stdio.h> // fprintf
+#include <stdlib.h> // strtoul
+#include <string.h> // strerror
+#include <sys/stat.h> // fstat
+#include <unistd.h> // getopt
+
+char *argv0 = "split";
+
+static char *error_opt_b_l = "split: Options -b and -l are mutually exclusive\n";
+
+int
+main(int argc, char *argv[])
+{
+ char *name = "x";
+ size_t suffix_len = 3, bytes = 0, lines = 0;
+
+ int c = -1;
+ while((c = getopt(argc, argv, ":a:b:l:")) != -1)
+ {
+ char *endptr = NULL;
+
+ switch(c)
+ {
+ case 'a':
+ suffix_len = strtoul(optarg, &endptr, 0);
+ if(suffix_len == 0)
+ {
+ fprintf(stderr, "split: Error while parsing '-a %s': %s\n", optarg, strerror(errno));
+ return 1;
+ }
+ if(endptr != NULL && *endptr != '\0')
+ {
+ fprintf(stderr, "split: Invalid trailing characters in '-a %s': %s\n", optarg, endptr);
+ return 1;
+ }
+ break;
+ case 'b':
+ {
+ if(lines != 0)
+ {
+ fputs(error_opt_b_l, stderr);
+ return 1;
+ }
+
+ long opt_b = strtoul(optarg, &endptr, 0);
+ if(opt_b == 0)
+ {
+ fprintf(stderr, "split: Error while parsing '-b %s': %s\n", optarg, strerror(errno));
+ return 1;
+ }
+
+ if(endptr != NULL && *endptr != 0)
+ if(apply_size_suffix(&opt_b, endptr) != 0) return 1;
+
+ bytes = (size_t)opt_b;
+ lines = 0;
+ break;
+ }
+ case 'l':
+ if(bytes != 0)
+ {
+ fputs(error_opt_b_l, stderr);
+ return 1;
+ }
+
+ lines = strtoul(optarg, &endptr, 0);
+ if(lines == 0)
+ {
+ fprintf(stderr, "split: Error while parsing '-l %s': %s\n", optarg, strerror(errno));
+ return 1;
+ }
+ if(endptr != NULL && *endptr != '\0')
+ {
+ fprintf(stderr, "split: Invalid trailing characters in '-l %s': %s\n", optarg, endptr);
+ return 1;
+ }
+ break;
+ case ':':
+ fprintf(stderr, "split: Option '-%c' requires an operand\n", optopt);
+ return 1;
+ default:
+ fprintf(stderr, "split: Unhandled option '-%c'\n", optopt);
+ return 1;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if(lines == 0 && bytes == 0) lines = 1000;
+
+ char *name_in = NULL;
+
+ if(argc > 2 || argc < 0)
+ {
+ fprintf(stderr, "split: Expected 0, 1, or 2 arguments, got %d\n", argc);
+ return 1;
+ }
+ else if(argc >= 1)
+ {
+ name_in = argv[0];
+
+ if(argc == 2) name = argv[1];
+ }
+
+ size_t name_len = strlen(name);
+ if(name_len + suffix_len > NAME_MAX)
+ {
+ fprintf(stderr,
+ "split: Error: name(%zd bytes) + suffix_length(%zd bytes) > NAME_MAX(%d bytes)\n",
+ name_len,
+ suffix_len,
+ NAME_MAX);
+ return 1;
+ }
+
+ if(bytes != 0)
+ {
+ int fd_in = STDIN_FILENO;
+ if(name_in != NULL)
+ {
+ fd_in = open(name_in, O_RDONLY | O_NOCTTY);
+ if(fd_in < 0)
+ {
+ fprintf(stderr, "split: Failed opening '%s' file: %s\n", name_in, strerror(errno));
+ return 1;
+ }
+ }
+
+ struct stat fd_in_stat;
+ if(fstat(fd_in, &fd_in_stat) != 0)
+ {
+ fprintf(stderr, "split: Failed getting status from file '%s': %s", name_in, strerror(errno));
+ close(fd_in);
+ return 1;
+ }
+
+ int err = 0;
+ off_t wrote = 0;
+ int split_id = 0;
+ while(wrote < fd_in_stat.st_size)
+ {
+ char name_out[NAME_MAX] = "";
+ memcpy(name_out, name, name_len);
+ memset(name_out + name_len, 'a', suffix_len);
+
+ int id_n = split_id++;
+ size_t id_p = name_len + suffix_len;
+ do
+ {
+ name_out[id_p--] = 'a' + (id_n % 26);
+ id_n /= 26;
+ } while(id_n > 0 && id_p > name_len);
+
+ if(id_p <= name_len)
+ {
+ fprintf(stderr,
+ "split: Failed representing %d into suffix of length %zu\n",
+ split_id,
+ suffix_len);
+ return 1;
+ }
+
+ int fd_out = open(name_out, O_WRONLY | O_NOCTTY | O_CREAT, 0644);
+ if(fd_out < 0)
+ {
+ fprintf(stderr, "split: Failed opening '%s' file: %s\n", name_out, strerror(errno));
+ return 1;
+ }
+
+ int ret = auto_file_copy(fd_in, fd_out, bytes, 0);
+ if(ret < 0)
+ {
+ fprintf(stderr,
+ "split: Error while copying from file '%s' to file '%s': %s\n",
+ name_in ? name_in : "<stdin>",
+ name_out,
+ strerror(errno));
+ close(fd_out);
+ err = 1;
+ break;
+ }
+
+ wrote += ret;
+
+ if(close(fd_out) < 0)
+ {
+ fprintf(stderr, "split: Failing closing file '%s': %s\n", name_out, strerror(errno));
+ err = 1;
+ break;
+ }
+ }
+
+ if(name_in != NULL) close(fd_in);
+
+ return err;
+ }
+
+ return 0;
+}
diff --git a/coreutils.txt b/coreutils.txt
@@ -77,7 +77,7 @@ shred: ?
shuf: Todo
sleep: Done
sort: ?
-split: No. Considered obsolete
+split: Done
stat: Todo
stdbuf: No
stty: Todo
diff --git a/lsb_commands.txt b/lsb_commands.txt
@@ -111,7 +111,7 @@ sh: out of scope
shutdown: No
sleep: Done
sort: ?
-split: No. Considered obsolete
+split: Done
strings: Done
strip: out of scope
stty: Todo
diff --git a/posix_utilities.txt b/posix_utilities.txt
@@ -115,7 +115,7 @@ sed: no
sh: no, external
sleep: done
sort
-split
+split: done
strings: done
strip: no, toolchain POSIX2_SW_DEV
stty
diff --git a/test-cmd/Kyuafile b/test-cmd/Kyuafile
@@ -32,6 +32,7 @@ tap_test_program{name="realpath.sh", required_files=basedir.."/cmd/realpath", ti
tap_test_program{name="seq.sh", required_files=basedir.."/cmd/seq", timeout=1}
tap_test_program{name="sha1sum.sh", required_files=basedir.."/cmd/sha1sum", timeout=1}
tap_test_program{name="sleep.sh", required_files=basedir.."/cmd/sleep", timeout=1}
+tap_test_program{name="split.sh", required_files=basedir.."/cmd/split", timeout=1}
tap_test_program{name="strings.sh", required_files=basedir.."/cmd/strings", timeout=1}
tap_test_program{name="test.sh", required_files=basedir.."/cmd/test", timeout=2}
tap_test_program{name="true.sh", required_files=basedir.."/cmd/true", timeout=1}
diff --git a/test-cmd/split.sh b/test-cmd/split.sh
@@ -0,0 +1,71 @@
+#!/bin/sh
+# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+# SPDX-License-Identifier: MPL-2.0
+
+WD="$(dirname "$0")/../"
+WD="$(realpath "$WD")"
+target="$WD/cmd/split"
+
+t()
+{
+ count=$((count+1))
+ name="$1"
+
+ shift
+
+ if "$@"; then
+ printf 'ok %s - %s\n' "$count" "$name"
+ else
+ printf '# Command failed: %s\n' "$*"
+ printf 'not ok %s - %s\n' "$count" "$name"
+ err=1
+ fi
+}
+
+t_end()
+{
+ rm split_test_*
+
+ cd "${oldpwd?}"
+ rm -r "${tempdir?}"
+
+ if [ $count -ne $plans ]
+ then
+ printf 'Error: Ran %d instead of the planned %d tests\n' "$count" "$plans" >&2
+ err=1
+ fi
+
+ exit $err
+}
+
+check_splits()
+{
+ for i in split_test_*
+ do
+ size="$(wc -c $i | cut -d' ' -f1)"
+ if [ "$size" != "32" ]
+ then
+ printf "# Expected 32 bytes but '%s' is %d bytes\n" "$i" "$size"
+ return 1
+ fi
+ done
+}
+
+err=0
+plans=3
+count=0
+
+printf '1..%d\n' "$plans"
+
+oldpwd="$PWD"
+tempdir="$(mktemp -d)"
+cd "$tempdir" || exit 1
+
+trap t_end EXIT
+
+t split "$target" -b 32 "$WD/test-cmd/inputs/all_bytes" split_test_
+
+t check_splits check_splits
+
+cat split_test_* > split_test
+t compare cmp split_test "$WD/test-cmd/inputs/all_bytes"