commit: 8aec98a517fdbbb0fc303e64ad5c7f61babdfd1b
parent 7c50cbab04faffa818b127dd3b55047dc01cfc4f
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Fri, 16 Jan 2026 23:50:47 +0100
cmd/cut: implement -n option
Fixes: https://todo.sr.ht/~lanodan/utils-std/4
Diffstat:
3 files changed, 71 insertions(+), 5 deletions(-)
diff --git a/cmd/cut.1 b/cmd/cut.1
@@ -74,7 +74,8 @@ if an empty string is passed, then NULL is used as separator.
Cut fields based on
.Ar list .
.It Fl n
-Do not split codepoints. (Currently unsupported in this implementation)
+Prevent splitting codepoints.
+If the last byte of a codepoint is part of the selection, it gets printed, otherwise it gets dropped.
.It Fl s
Suppress lines with no delimiter characters,
otherwise whole delimiter-less lines are printed as-is.
@@ -84,8 +85,6 @@ Use NULL as line separator instead of newline.
.Sh EXIT STATUS
.Ex -std
.Sh STANDARDS
-Except for the lack of support for
-.Fl n ,
.Nm
should be compliant with the
IEEE Std 1003.1-2024 (“POSIX.1”)
diff --git a/cmd/cut.c b/cmd/cut.c
@@ -193,6 +193,64 @@ cut_b(FILE *in, const char *filename)
}
static int
+cut_b_n(FILE *in, const char *filename)
+{
+ char *line = NULL;
+ size_t line_len = 0;
+ int err = 0;
+
+ while(err == 0)
+ {
+ errno = 0;
+ ssize_t sread = getdelim(&line, &line_len, line_delim, in);
+ if(sread < 0)
+ {
+ if(errno != 0)
+ {
+ fprintf(
+ stderr, "%s: error: Failed reading file '%s': %s\n", argv0, filename, strerror(errno));
+ err = 1;
+ }
+ break;
+ }
+ size_t nread = (size_t)sread;
+
+ if(nread == 0)
+ {
+ fputc(line_delim, stdout);
+ continue;
+ }
+
+ if(line[nread - 1] == '\n') line[nread--] = '\0';
+
+ for(size_t i = 0; i < MIN(list_len, nread); i++)
+ {
+ size_t isz = mbrlen(line + i, nread, NULL);
+
+ if(isz == 0 || isz == (size_t)-2 || isz == (size_t)-1) continue;
+
+ /*
+ * Check that last byte is part of the low-high selection.
+ * Per POSIX.1-2024 high and low only decrements, and otherwise drops the character.
+ */
+ if(list[i + isz - 1])
+ {
+ fwrite(line + i, isz, 1, stdout);
+ if(isz > 1) i += isz - 1;
+ }
+ }
+
+ if(nostop && nread > list_len) fwrite(line + list_len, nread - list_len, 1, stdout);
+
+ fputc(line_delim, stdout);
+ }
+
+ free(line);
+
+ return err;
+}
+
+static int
cut_c(FILE *in, const char *filename)
{
char *line = NULL;
@@ -360,7 +418,7 @@ cut(FILE *in, const char *filename)
fprintf(stderr, "%s: error: No action (-b, -c, -f) specified\n", argv0);
return 1;
case CUT_MODE_B:
- return cut_b(in, filename);
+ return opt_n ? cut_b_n(in, filename) : cut_b(in, filename);
case CUT_MODE_C:
return cut_c(in, filename);
case CUT_MODE_F:
diff --git a/test-cmd/cut.sh b/test-cmd/cut.sh
@@ -2,9 +2,9 @@
# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
# SPDX-License-Identifier: MPL-2.0
+plans=18
WD="$(dirname "$0")/../"
target="${WD}/cmd/cut"
-plans=15
. "${WD}/test-cmd/tap.sh"
. "${WD}/test-cmd/init_env.sh"
@@ -33,6 +33,15 @@ t 'bytes:-10' "-b -10 ${WD}/test-cmd/inputs/alnum" '0123456789
'
t --exit=1 'bytes:,' "-b , ${WD}/test-cmd/inputs/alnum" 'cut: error: Empty list element
'
+# The cafés tests assumes a UTF-8 environment
+t --input='cafés' 'bytes:cafés[4]' '-b 4' '
+'
+# per POSIX high only decrements so here it gets dropped
+t --input='cafés' 'bytes:cafés[4]' '-b 4 -n' '
+'
+t --input='cafés' 'bytes:cafés[5]' '-b 5 -n' 'é
+'
+
# Example taken from POSIX cut(1)
t --input='abcdefghi' 'chars:6,2,4-7,1' '-c 6,2,4-7,1' 'abdefg
'