commit: 861cea5493bbadb8b9be3eb8b15b059cf42c2250
parent 2f5aaa2783f48d2e7eba4116fc939b5d7a2c9391
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Sun, 9 Mar 2025 18:27:26 +0100
cmd/printf: add support for \e and \c escape codes
Diffstat:
4 files changed, 96 insertions(+), 11 deletions(-)
diff --git a/cmd/printf.1 b/cmd/printf.1
@@ -29,7 +29,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd July 1, 2020
+.Dd March 9, 2025
.Dt PRINTF 1
.Os
.Sh NAME
@@ -85,6 +85,8 @@ are as follows:
Write a <bell> character.
.It Cm \eb
Write a <backspace> character.
+.It Cm \ee
+Write an <escape> character.
.It Cm \ef
Write a <form-feed> character.
.It Cm \en
@@ -95,10 +97,42 @@ Write a <carriage return> character.
Write a <tab> character.
.It Cm \ev
Write a <vertical tab> character.
-.It Cm \e\'
+.It Cm \e\(aq
Write a <single quote> character.
.It Cm \e\e
Write a backslash character.
+.It Cm \ec Ns Ar char
+Write a control character, where:
+.Bl -bullet -compact
+.It
+.Cm @
+is 0x00 / NULL,
+.It
+.Cm A-Z
+/
+.Cm a-z
+corresponds to 0x01-0x1A,
+.It
+.Cm \(lB
+is 0x1B / ESC / Escape,
+.It
+.Cm \(rs
+is 0x1C / FS / Field-Separator,
+.It
+.Cm \(rB
+is 0x1D / GS / Group-Separator,
+.It
+.Cm ^
+is 0x1E / RS / Record-Separator,
+.\" not sure how to tell mandoc to include the underscore as Cm argument
+.It
+.Cm _
+is 0x1F / US / Unit-Separator,
+.It
+.Cm \&?
+is 0x7F / DEL / Delete.
+.El
+Note that the values corresponds to circumflex escapes (ie. ^A == \ecA).
.It Cm \ex Ns Ar hex-num
Write a byte whose
value is the 1- or 2-digits
@@ -368,12 +402,18 @@ code of the first character is not supported.
.Sh STANDARDS
The
.Nm
-command is expected to be compatible with the
-.St -p1003.2
-specification.
+utility should be compliant with the
+IEEE Std 1003.1-2024 (“POSIX.1”) specification.
+.Pp
The
+.Cm \ee ,
+.Cm \ec Ns Ar char ,
+and
.Cm \ex Ns Ar hex-num
-backslash-escape is an extension.
+backslash-escapes are extensions
+inspired by
+.Xr sh 1 Ns 's
+dollar-single-quote($'…') escapes.
.Sh HISTORY
The
.Nm
diff --git a/cmd/printf.c b/cmd/printf.c
@@ -500,6 +500,13 @@ escape(char *fmt, int percent, size_t *len)
char *save, *store, c;
int value;
+ /*
+ * Required by POSIX.1-2024 for printf: \\ \a \b \f \n \r \t \v \000
+ *
+ * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04>
+ * As inspiration, required by POSIX.1-2024 for dollar-single-quote($'…'):
+ * \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000
+ */
for(save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store)
{
if(c != '\\')
@@ -531,7 +538,30 @@ escape(char *fmt, int percent, size_t *len)
*len = store - save;
return (1);
}
- *store = 'c';
+
+ /* Assumes ASCII */
+ if(fmt[1] == '?')
+ {
+ fmt++;
+ *store = '\177';
+ }
+ else if(fmt[1] >= 'a' && fmt[1] <= 'z')
+ {
+ fmt++;
+ *store = (fmt[0] - 'a') + 1;
+ }
+ else if(fmt[1] >= '@' && fmt[1] <= '_')
+ {
+ fmt++;
+ *store = (fmt[0] - '@');
+ }
+ else
+ {
+ *store = 'c';
+ }
+ break;
+ case 'e': /* escape */
+ *store = '\033';
break;
case 'f': /* form-feed */
*store = '\f';
@@ -548,7 +578,6 @@ escape(char *fmt, int percent, size_t *len)
case 'v': /* vertical tab */
*store = '\v';
break;
- /* octal constant */
case 'x': /* hex */
c = 2;
fmt++;
@@ -565,6 +594,7 @@ escape(char *fmt, int percent, size_t *len)
--fmt;
*store = (char)value;
break;
+ /* octal constant */
case '0':
case '1':
case '2':
diff --git a/test-cmd/inputs/all_ascii b/test-cmd/inputs/all_ascii
Binary files differ.
diff --git a/test-cmd/printf.sh b/test-cmd/printf.sh
@@ -2,9 +2,10 @@
# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
# SPDX-License-Identifier: MPL-2.0
-target="$(dirname "$0")/../cmd/printf"
-plans=4
-. "$(dirname "$0")/tap.sh"
+WD="$(dirname "$0")/../"
+target="${WD}/cmd/printf"
+plans=6
+. "${WD}/test-cmd/tap.sh"
t esc '\b\t\n' '
'
@@ -15,3 +16,17 @@ t hex '\x7B\x7d' '{}'
t repeat_fmt '%s\n foo bar' 'foo
bar
'
+
+var_c_upper=$(cat <<'EOF'
+\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$%%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?
+EOF
+)
+
+t_file esc_c_upper "${WD}/test-cmd/inputs/all_ascii" "${var_c_upper}"
+
+var_c_lower=$(cat <<'EOF'
+\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$%%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?
+EOF
+)
+
+t_file esc_c_lower "${WD}/test-cmd/inputs/all_ascii" "${var_c_lower}"