logo

utils-std

Collection of commonly available Unix tools
commit: fdd47a4aef8b85e440c2c0bbee1a72947521ab9e
parent a9e81d3a107310775ac4d37b44bff45e61c442b6
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Thu, 15 Aug 2024 21:43:31 +0200

cmd/split: new

Diffstat:

MMakefile4++++
Acmd/split.c212+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcoreutils.txt2+-
Mlsb_commands.txt2+-
Mposix_utilities.txt2+-
Mtest-cmd/Kyuafile1+
Atest-cmd/split.sh71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 291 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile @@ -150,6 +150,10 @@ cmd/head: cmd/head.c lib/truncation.c lib/truncation.h Makefile $(RM) -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno} $(CC) -std=c99 $(CFLAGS) -o $@ cmd/head.c lib/truncation.c $(LDFLAGS) $(LDSTATIC) +cmd/split: cmd/split.c lib/truncation.c lib/truncation.h lib/fs.c lib/fs.h Makefile + $(RM) -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno} + $(CC) -std=c99 $(CFLAGS) -o $@ cmd/split.c lib/truncation.c lib/fs.c $(LDFLAGS) $(LDSTATIC) + cmd/tr: cmd/tr.c lib/tr_str.c lib/tr_str.h Makefile $(RM) -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno} $(CC) -std=c99 $(CFLAGS) -o $@ cmd/tr.c lib/tr_str.c $(LDFLAGS) $(LDSTATIC) diff --git a/cmd/split.c b/cmd/split.c @@ -0,0 +1,212 @@ +// utils-std: Collection of commonly available Unix tools +// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +// SPDX-License-Identifier: MPL-2.0 + +#define _POSIX_C_SOURCE 200809L + +#include "../lib/fs.h" // auto_file_copy +#include "../lib/truncation.h" // apply_size_suffix + +#include <errno.h> +#include <fcntl.h> // open +#include <limits.h> // NAME_MAX +#include <stdio.h> // fprintf +#include <stdlib.h> // strtoul +#include <string.h> // strerror +#include <sys/stat.h> // fstat +#include <unistd.h> // getopt + +char *argv0 = "split"; + +static char *error_opt_b_l = "split: Options -b and -l are mutually exclusive\n"; + +int +main(int argc, char *argv[]) +{ + char *name = "x"; + size_t suffix_len = 3, bytes = 0, lines = 0; + + int c = -1; + while((c = getopt(argc, argv, ":a:b:l:")) != -1) + { + char *endptr = NULL; + + switch(c) + { + case 'a': + suffix_len = strtoul(optarg, &endptr, 0); + if(suffix_len == 0) + { + fprintf(stderr, "split: Error while parsing '-a %s': %s\n", optarg, strerror(errno)); + return 1; + } + if(endptr != NULL && *endptr != '\0') + { + fprintf(stderr, "split: Invalid trailing characters in '-a %s': %s\n", optarg, endptr); + return 1; + } + break; + case 'b': + { + if(lines != 0) + { + fputs(error_opt_b_l, stderr); + return 1; + } + + long opt_b = strtoul(optarg, &endptr, 0); + if(opt_b == 0) + { + fprintf(stderr, "split: Error while parsing '-b %s': %s\n", optarg, strerror(errno)); + return 1; + } + + if(endptr != NULL && *endptr != 0) + if(apply_size_suffix(&opt_b, endptr) != 0) return 1; + + bytes = (size_t)opt_b; + lines = 0; + break; + } + case 'l': + if(bytes != 0) + { + fputs(error_opt_b_l, stderr); + return 1; + } + + lines = strtoul(optarg, &endptr, 0); + if(lines == 0) + { + fprintf(stderr, "split: Error while parsing '-l %s': %s\n", optarg, strerror(errno)); + return 1; + } + if(endptr != NULL && *endptr != '\0') + { + fprintf(stderr, "split: Invalid trailing characters in '-l %s': %s\n", optarg, endptr); + return 1; + } + break; + case ':': + fprintf(stderr, "split: Option '-%c' requires an operand\n", optopt); + return 1; + default: + fprintf(stderr, "split: Unhandled option '-%c'\n", optopt); + return 1; + } + } + + argc -= optind; + argv += optind; + + if(lines == 0 && bytes == 0) lines = 1000; + + char *name_in = NULL; + + if(argc > 2 || argc < 0) + { + fprintf(stderr, "split: Expected 0, 1, or 2 arguments, got %d\n", argc); + return 1; + } + else if(argc >= 1) + { + name_in = argv[0]; + + if(argc == 2) name = argv[1]; + } + + size_t name_len = strlen(name); + if(name_len + suffix_len > NAME_MAX) + { + fprintf(stderr, + "split: Error: name(%zd bytes) + suffix_length(%zd bytes) > NAME_MAX(%d bytes)\n", + name_len, + suffix_len, + NAME_MAX); + return 1; + } + + if(bytes != 0) + { + int fd_in = STDIN_FILENO; + if(name_in != NULL) + { + fd_in = open(name_in, O_RDONLY | O_NOCTTY); + if(fd_in < 0) + { + fprintf(stderr, "split: Failed opening '%s' file: %s\n", name_in, strerror(errno)); + return 1; + } + } + + struct stat fd_in_stat; + if(fstat(fd_in, &fd_in_stat) != 0) + { + fprintf(stderr, "split: Failed getting status from file '%s': %s", name_in, strerror(errno)); + close(fd_in); + return 1; + } + + int err = 0; + off_t wrote = 0; + int split_id = 0; + while(wrote < fd_in_stat.st_size) + { + char name_out[NAME_MAX] = ""; + memcpy(name_out, name, name_len); + memset(name_out + name_len, 'a', suffix_len); + + int id_n = split_id++; + size_t id_p = name_len + suffix_len; + do + { + name_out[id_p--] = 'a' + (id_n % 26); + id_n /= 26; + } while(id_n > 0 && id_p > name_len); + + if(id_p <= name_len) + { + fprintf(stderr, + "split: Failed representing %d into suffix of length %zu\n", + split_id, + suffix_len); + return 1; + } + + int fd_out = open(name_out, O_WRONLY | O_NOCTTY | O_CREAT, 0644); + if(fd_out < 0) + { + fprintf(stderr, "split: Failed opening '%s' file: %s\n", name_out, strerror(errno)); + return 1; + } + + int ret = auto_file_copy(fd_in, fd_out, bytes, 0); + if(ret < 0) + { + fprintf(stderr, + "split: Error while copying from file '%s' to file '%s': %s\n", + name_in ? name_in : "<stdin>", + name_out, + strerror(errno)); + close(fd_out); + err = 1; + break; + } + + wrote += ret; + + if(close(fd_out) < 0) + { + fprintf(stderr, "split: Failing closing file '%s': %s\n", name_out, strerror(errno)); + err = 1; + break; + } + } + + if(name_in != NULL) close(fd_in); + + return err; + } + + return 0; +} diff --git a/coreutils.txt b/coreutils.txt @@ -77,7 +77,7 @@ shred: ? shuf: Todo sleep: Done sort: ? -split: No. Considered obsolete +split: Done stat: Todo stdbuf: No stty: Todo diff --git a/lsb_commands.txt b/lsb_commands.txt @@ -111,7 +111,7 @@ sh: out of scope shutdown: No sleep: Done sort: ? -split: No. Considered obsolete +split: Done strings: Done strip: out of scope stty: Todo diff --git a/posix_utilities.txt b/posix_utilities.txt @@ -115,7 +115,7 @@ sed: no sh: no, external sleep: done sort -split +split: done strings: done strip: no, toolchain POSIX2_SW_DEV stty diff --git a/test-cmd/Kyuafile b/test-cmd/Kyuafile @@ -32,6 +32,7 @@ tap_test_program{name="realpath.sh", required_files=basedir.."/cmd/realpath", ti tap_test_program{name="seq.sh", required_files=basedir.."/cmd/seq", timeout=1} tap_test_program{name="sha1sum.sh", required_files=basedir.."/cmd/sha1sum", timeout=1} tap_test_program{name="sleep.sh", required_files=basedir.."/cmd/sleep", timeout=1} +tap_test_program{name="split.sh", required_files=basedir.."/cmd/split", timeout=1} tap_test_program{name="strings.sh", required_files=basedir.."/cmd/strings", timeout=1} tap_test_program{name="test.sh", required_files=basedir.."/cmd/test", timeout=2} tap_test_program{name="true.sh", required_files=basedir.."/cmd/true", timeout=1} diff --git a/test-cmd/split.sh b/test-cmd/split.sh @@ -0,0 +1,71 @@ +#!/bin/sh +# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +# SPDX-License-Identifier: MPL-2.0 + +WD="$(dirname "$0")/../" +WD="$(realpath "$WD")" +target="$WD/cmd/split" + +t() +{ + count=$((count+1)) + name="$1" + + shift + + if "$@"; then + printf 'ok %s - %s\n' "$count" "$name" + else + printf '# Command failed: %s\n' "$*" + printf 'not ok %s - %s\n' "$count" "$name" + err=1 + fi +} + +t_end() +{ + rm split_test_* + + cd "${oldpwd?}" + rm -r "${tempdir?}" + + if [ $count -ne $plans ] + then + printf 'Error: Ran %d instead of the planned %d tests\n' "$count" "$plans" >&2 + err=1 + fi + + exit $err +} + +check_splits() +{ + for i in split_test_* + do + size="$(wc -c $i | cut -d' ' -f1)" + if [ "$size" != "32" ] + then + printf "# Expected 32 bytes but '%s' is %d bytes\n" "$i" "$size" + return 1 + fi + done +} + +err=0 +plans=3 +count=0 + +printf '1..%d\n' "$plans" + +oldpwd="$PWD" +tempdir="$(mktemp -d)" +cd "$tempdir" || exit 1 + +trap t_end EXIT + +t split "$target" -b 32 "$WD/test-cmd/inputs/all_bytes" split_test_ + +t check_splits check_splits + +cat split_test_* > split_test +t compare cmp split_test "$WD/test-cmd/inputs/all_bytes"