commit: 280bfc2fc6c34d325f70eead7d0e95b25f7fdc02
parent 8afdb73377e618b0259a35cda81868d99ad92149
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date: Mon, 26 May 2025 08:36:32 +0200
cmd/printf: rewrite, fixing documented bugs and getting rid of VLA
Diffstat:
4 files changed, 497 insertions(+), 708 deletions(-)
diff --git a/cmd/printf.1 b/cmd/printf.1
@@ -29,7 +29,7 @@
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
-.Dd March 9, 2025
+.Dd May 26, 2025
.Dt PRINTF 1
.Os
.Sh NAME
@@ -74,8 +74,8 @@ The format string is reused as often as necessary to satisfy the
Any extra format specifications are evaluated with zero or the null
string.
.Pp
-Character escape sequences are in backslash notation as defined in the
-.St -ansiC ,
+Character escape sequences are in backslash notation as defined in
+.St -isoC-2011 ,
with extensions.
The characters and their meanings
are as follows:
@@ -101,8 +101,14 @@ Write a <vertical tab> character.
Write a <single quote> character.
.It Cm \e\e
Write a backslash character.
-.It Cm \ec Ns Ar char
-Write a control character, where:
+.It Cm \ec | Cm \ec Ns Ar char
+In
+.Cm %b
+cut the string argument and make
+.Nm printf
+exits.
+.Pp
+Otherwise as format conversion escape: Write a control character, where:
.Bl -bullet -compact
.It
.Cm @
@@ -422,41 +428,3 @@ command appeared in
It is modeled
after the standard library function,
.Xr printf 3 .
-.Sh CAVEATS
-ANSI hexadecimal character constants were deliberately not provided.
-.Pp
-Trying to print a dash ("-") as the first character causes
-.Nm
-to interpret the dash as a program argument.
-.Nm --
-must be used before
-.Ar format .
-.Pp
-If the locale contains multibyte characters
-(such as UTF-8),
-the
-.Cm c
-format and
-.Cm b
-and
-.Cm s
-formats with a precision
-may not operate as expected.
-.Sh BUGS
-Since the floating point numbers are translated from ASCII
-to floating-point and then back again, floating-point precision may be lost.
-(By default, the number is translated to an IEEE-754 double-precision
-value before being printed.
-The
-.Cm L
-modifier may produce additional precision, depending on the hardware platform.)
-.Pp
-The escape sequence \e000 is the string terminator.
-When present in the argument for the
-.Cm b
-format, the argument will be truncated at the \e000 character.
-.Pp
-Multibyte characters are not recognized in format strings (this is only
-a problem if
-.Ql %
-can appear inside a multibyte character).
diff --git a/cmd/printf.c b/cmd/printf.c
@@ -1,550 +1,79 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
- * Copyright 2014 Garrett D'Amore <garrett@damore.org>
- * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 1989, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Important: This file is used both as a standalone program /usr/bin/printf
- * and as a builtin for /bin/sh (#define SHELL).
- */
+// utils-std: Collection of commonly available Unix tools
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
#define _POSIX_C_SOURCE 200809L
-
-#include "../lib/err.h"
-#include "../lib/getopt_nolong.h"
-
-#include <assert.h>
-#include <ctype.h>
#include <errno.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <locale.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <wchar.h>
-
-#define PF(f, func) \
- do \
- { \
- if(havewidth) \
- if(haveprec) \
- (void)printf(f, fieldwidth, precision, func); \
- else \
- (void)printf(f, fieldwidth, func); \
- else if(haveprec) \
- (void)printf(f, precision, func); \
- else \
- (void)printf(f, func); \
- } while(0)
-
-static int asciicode(void);
-static char *printf_doformat(char *, int *);
-static int escape(char *, int, size_t *);
-static int getchr(void);
-static int getfloating(long double *, int);
-static int getint(int *);
-static int getnum(intmax_t *, uintmax_t *, int);
-static const char *getstr(void);
-static char *mknum(char *, char);
-static void usage(void);
-
-static const char digits[] = "0123456789";
-
-static char end_fmt[1];
-
-static int myargc;
-static char **myargv;
-static char **gargv;
-static char **maxargv;
-
-const char *argv0 = "printf";
+#include <stdio.h> // printf
+#include <stdlib.h> // strtoul, strtod
+#include <string.h> // strlen, memchr
-int
-main(int argc, char *argv[])
+// [1-9]
+static int
+isndigit(int c)
{
- size_t len;
- int end, rval;
- char *format, *fmt, *start;
-
- char *lc_all = setlocale(LC_ALL, "");
- if(lc_all == NULL)
- {
- fprintf(stderr,
- "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
- argv0,
- strerror(errno));
- }
-
- for(int c = -1; (c = getopt_nolong(argc, argv, "")) != -1;)
- {
- switch(c)
- {
- case '?':
- default:
- usage();
- return (1);
- }
- }
-
- argc -= optind;
- argv += optind;
-
- if(argc < 1)
- {
- usage();
- return (1);
- }
-
- /*
- * Basic algorithm is to scan the format string for conversion
- * specifications -- once one is found, find out if the field
- * width or precision is a '*'; if it is, gather up value. Note,
- * format strings are reused as necessary to use up the provided
- * arguments, arguments of zero/null string are provided to use
- * up the format string.
- */
- fmt = format = *argv;
- escape(fmt, 1, &len); /* backslash interpretation */
- rval = end = 0;
- gargv = ++argv;
-
- for(;;)
- {
- maxargv = gargv;
-
- myargv = gargv;
- for(myargc = 0; gargv[myargc]; myargc++)
- /* nop */;
- start = fmt;
- while(fmt < format + len)
- {
- if(fmt[0] == '%')
- {
- fwrite(start, 1, fmt - start, stdout);
- if(fmt[1] == '%')
- {
- /* %% prints a % */
- putchar('%');
- fmt += 2;
- }
- else
- {
- fmt = printf_doformat(fmt, &rval);
- if(fmt == NULL || fmt == end_fmt)
- {
- return (fmt == NULL ? 1 : rval);
- }
- end = 0;
- }
- start = fmt;
- }
- else
- fmt++;
- if(gargv > maxargv) maxargv = gargv;
- }
- gargv = maxargv;
-
- if(end == 1)
- {
- utils_warnx("missing format character");
- return (1);
- }
- fwrite(start, 1, fmt - start, stdout);
- if(!*gargv)
- {
- return (rval);
- }
- /* Restart at the beginning of the format string. */
- fmt = format;
- end = 1;
- }
- /* NOTREACHED */
+ return c >= '1' && c <= '9';
}
-static char *
-printf_doformat(char *fmt, int *rval)
+// digits [0-9]
+static int
+isdigit(int c)
{
- static const char skip1[] = "#'-+ 0";
- int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
- char convch, nextch;
- char start[strlen(fmt) + 1];
- char **fargv;
- char *dptr;
- int l;
-
- dptr = start;
- *dptr++ = '%';
- *dptr = 0;
-
- fmt++;
-
- /* look for "n$" field index specifier */
- l = strspn(fmt, digits);
- if((l > 0) && (fmt[l] == '$'))
- {
- int idx = atoi(fmt);
- if(idx <= myargc)
- {
- gargv = &myargv[idx - 1];
- }
- else
- {
- gargv = &myargv[myargc];
- }
- if(gargv > maxargv) maxargv = gargv;
- fmt += l + 1;
-
- /* save format argument */
- fargv = gargv;
- }
- else
- {
- fargv = NULL;
- }
-
- /* skip to field width */
- while(*fmt && strchr(skip1, *fmt) != NULL)
- {
- *dptr++ = *fmt++;
- *dptr = 0;
- }
-
- if(*fmt == '*')
- {
-
- fmt++;
- l = strspn(fmt, digits);
- if((l > 0) && (fmt[l] == '$'))
- {
- int idx = atoi(fmt);
- if(fargv == NULL)
- {
- utils_warnx("incomplete use of n$");
- return (NULL);
- }
- if(idx <= myargc)
- {
- gargv = &myargv[idx - 1];
- }
- else
- {
- gargv = &myargv[myargc];
- }
- fmt += l + 1;
- }
- else if(fargv != NULL)
- {
- utils_warnx("incomplete use of n$");
- return (NULL);
- }
-
- if(getint(&fieldwidth)) return (NULL);
- if(gargv > maxargv) maxargv = gargv;
- havewidth = 1;
-
- *dptr++ = '*';
- *dptr = 0;
- }
- else
- {
- havewidth = 0;
-
- /* skip to possible '.', get following precision */
- while(isdigit(*fmt))
- {
- *dptr++ = *fmt++;
- *dptr = 0;
- }
- }
-
- if(*fmt == '.')
- {
- /* precision present? */
- fmt++;
- *dptr++ = '.';
-
- if(*fmt == '*')
- {
-
- fmt++;
- l = strspn(fmt, digits);
- if((l > 0) && (fmt[l] == '$'))
- {
- int idx = atoi(fmt);
- if(fargv == NULL)
- {
- utils_warnx("incomplete use of n$");
- return (NULL);
- }
- if(idx <= myargc)
- {
- gargv = &myargv[idx - 1];
- }
- else
- {
- gargv = &myargv[myargc];
- }
- fmt += l + 1;
- }
- else if(fargv != NULL)
- {
- utils_warnx("incomplete use of n$");
- return (NULL);
- }
-
- if(getint(&precision)) return (NULL);
- if(gargv > maxargv) maxargv = gargv;
- haveprec = 1;
- *dptr++ = '*';
- *dptr = 0;
- }
- else
- {
- haveprec = 0;
-
- /* skip to conversion char */
- while(isdigit(*fmt))
- {
- *dptr++ = *fmt++;
- *dptr = 0;
- }
- }
- }
- else
- haveprec = 0;
- if(!*fmt)
- {
- utils_warnx("missing format character");
- return (NULL);
- }
- *dptr++ = *fmt;
- *dptr = 0;
-
- /*
- * Look for a length modifier. POSIX doesn't have these, so
- * we only support them for floating-point conversions, which
- * are extensions. This is useful because the L modifier can
- * be used to gain extra range and precision, while omitting
- * it is more likely to produce consistent results on different
- * architectures. This is not so important for integers
- * because overflow is the only bad thing that can happen to
- * them, but consider the command printf %a 1.1
- */
- if(*fmt == 'L')
- {
- mod_ldbl = 1;
- fmt++;
- if(!strchr("aAeEfFgG", *fmt))
- {
- utils_warnx("bad modifier L for %%%c", *fmt);
- return (NULL);
- }
- }
- else
- {
- mod_ldbl = 0;
- }
-
- /* save the current arg offset, and set to the format arg */
- if(fargv != NULL)
- {
- gargv = fargv;
- }
-
- convch = *fmt;
- nextch = *++fmt;
-
- *fmt = '\0';
- switch(convch)
- {
- case 'b':
- {
- size_t len;
- char *p;
- int getout;
-
- /* Convert "b" to "s" for output. */
- start[strlen(start) - 1] = 's';
- if((p = strdup(getstr())) == NULL)
- {
- utils_warnx("%s", strerror(ENOMEM));
- return (NULL);
- }
- getout = escape(p, 0, &len);
- PF(start, p);
- /* Restore format for next loop. */
-
- free(p);
- if(getout) return (end_fmt);
- break;
- }
- case 'c':
- {
- char p;
-
- p = getchr();
- if(p != '\0') PF(start, p);
- break;
- }
- case 's':
- {
- const char *p;
-
- p = getstr();
- PF(start, p);
- break;
- }
- case 'd':
- case 'i':
- case 'o':
- case 'u':
- case 'x':
- case 'X':
- {
- char *f;
- intmax_t val;
- uintmax_t uval;
- int signedconv;
-
- signedconv = (convch == 'd' || convch == 'i');
- if((f = mknum(start, convch)) == NULL) return (NULL);
- if(getnum(&val, &uval, signedconv)) *rval = 1;
- if(signedconv)
- PF(f, val);
- else
- PF(f, uval);
- break;
- }
- case 'e':
- case 'E':
- case 'f':
- case 'F':
- case 'g':
- case 'G':
- case 'a':
- case 'A':
- {
- long double p;
-
- if(getfloating(&p, mod_ldbl)) *rval = 1;
- if(mod_ldbl)
- PF(start, p);
- else
- PF(start, (double)p);
- break;
- }
- default:
- utils_warnx("illegal format character '%c'", convch);
- return (NULL);
- }
- *fmt = nextch;
- /* return the gargv to the next element */
- return (fmt);
+ return c >= '0' && c <= '9';
}
-static char *
-mknum(char *str, char ch)
+// hex digits [0-9A-Fa-f]
+static int
+isxdigit(int c)
{
- static char *copy;
- static size_t copy_size;
- char *newcopy;
- size_t len, newlen;
-
- len = strlen(str) + 2;
- if(len > copy_size)
- {
- newlen = len + 1023;
- assert(newlen != 0);
- if((newcopy = realloc(copy, newlen)) == NULL)
- {
- utils_warnx("%s", strerror(ENOMEM));
- return (NULL);
- }
- copy = newcopy;
- copy_size = newlen;
- }
-
- memmove(copy, str, len - 3);
- copy[len - 3] = 'j';
- copy[len - 2] = ch;
- copy[len - 1] = '\0';
- return (copy);
+ return isdigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
+// len parameter needed because of NULL escapes
+// returns 1 for handling '\c' early ends
static int
-escape(char *fmt, int percent, size_t *len)
+escape(char *fmt, size_t *len, int percent)
{
- char *save, *store, c;
+ char *start = fmt;
+ char *store;
+ char c = '\0';
int value;
/*
- * Required by POSIX.1-2024 for printf: \\ \a \b \f \n \r \t \v \000
+ * Required by POSIX.1-2024 for printf(1): \\ \a \b \c \f \n \r \t \v \000
*
- * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04>
* As inspiration, required by POSIX.1-2024 for dollar-single-quote($'…'):
- * \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000
+ * \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000
+ * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04>
*/
- for(save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store)
+ for(store = fmt; ((c = *fmt) != '\0') && fmt < (start + *len); ++fmt, ++store)
{
if(c != '\\')
{
*store = c;
continue;
}
+
switch(*++fmt)
{
- case '\0': /* EOS, user error */
- *store = '\\';
- *++store = '\0';
- *len = store - save;
- return (0);
- case '\\': /* backslash */
+ case '\\': /* backslash; POSIX */
case '\'': /* single quote */
+ default:
*store = *fmt;
break;
- case 'a': /* bell/alert */
+ case 'a': /* bell/alert; POSIX */
*store = '\a';
break;
- case 'b': /* backspace */
+ case 'b': /* backspace; POSIX */
*store = '\b';
break;
case 'c':
if(!percent)
{
+ /* clear; POSIX */
*store = '\0';
- *len = store - save;
- return (1);
+ *len = (size_t)(store - start);
+ return 1;
}
/* Assumes ASCII */
@@ -571,19 +100,19 @@ escape(char *fmt, int percent, size_t *len)
case 'e': /* escape */
*store = '\033';
break;
- case 'f': /* form-feed */
+ case 'f': /* form-feed; POSIX */
*store = '\f';
break;
- case 'n': /* newline */
+ case 'n': /* newline; POSIX */
*store = '\n';
break;
- case 'r': /* carriage-return */
+ case 'r': /* carriage-return; POSIX */
*store = '\r';
break;
- case 't': /* horizontal tab */
+ case 't': /* horizontal tab; POSIX */
*store = '\t';
break;
- case 'v': /* vertical tab */
+ case 'v': /* vertical tab; POSIX */
*store = '\v';
break;
case 'x': /* hex */
@@ -602,7 +131,7 @@ escape(char *fmt, int percent, size_t *len)
--fmt;
*store = (char)value;
break;
- /* octal constant */
+ /* octal; POSIX */
case '0':
case '1':
case '2':
@@ -618,170 +147,428 @@ escape(char *fmt, int percent, size_t *len)
value += *fmt - '0';
}
--fmt;
- if(percent && value == '%')
- {
- *store++ = '%';
- *store = '%';
- }
- else
- *store = (char)value;
- break;
- default:
- *store = *fmt;
+ *store = (percent && value == '%') ? '%' : (char)value;
break;
}
}
- *store = '\0';
- *len = store - save;
- return (0);
-}
-static int
-getchr(void)
-{
- if(!gargv || !*gargv) return ('\0');
- return ((int)**gargv++);
+ *store = '\0';
+ *len = (size_t)(store - start);
+ return 0;
}
-static const char *
-getstr(void)
+static void
+usage(void)
{
- if(!gargv || !*gargv) return ("");
- return (*gargv++);
+ (void)fputs("usage: printf format [arguments...]\n", stderr);
}
-static int
-getint(int *ip)
+int
+main(int argc, char *argv[])
{
- intmax_t val;
- uintmax_t uval;
- int rval;
+ argc--;
+ argv++;
- if(getnum(&val, &uval, 1)) return (1);
- rval = 0;
- if(val < INT_MIN || val > INT_MAX)
+ if(argc < 1)
{
- utils_warnx("%s: %s", *gargv, strerror(ERANGE));
- rval = 1;
+ usage();
+ return 1;
}
- *ip = (int)val;
- return (rval);
-}
-static int
-getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
-{
- char *ep;
- int rval;
+ char *fmt = argv[0];
+ size_t fmtlen = strlen(fmt);
- if(!gargv || !*gargv)
- {
- *ip = *uip = 0;
- return (0);
- }
- if(**gargv == '"' || **gargv == '\'')
- {
- if(signedconv)
- *ip = asciicode();
- else
- *uip = asciicode();
- return (0);
- }
- rval = 0;
- errno = 0;
- if(signedconv)
- *ip = strtoimax(*gargv, &ep, 0);
- else
- *uip = strtoumax(*gargv, &ep, 0);
- if(ep == *gargv)
- {
- utils_warnx("%s: expected numeric value", *gargv);
- rval = 1;
- }
- else if(*ep != '\0')
- {
- utils_warnx("%s: not completely converted", *gargv);
- rval = 1;
- }
- if(errno == ERANGE)
- {
- utils_warnx("%s: %s", *gargv, strerror(ERANGE));
- rval = 1;
- }
- ++gargv;
- return (rval);
-}
+ if(escape(fmt, &fmtlen, 1) != 0) return 1;
-static int
-getfloating(long double *dp, int mod_ldbl)
-{
- char *ep;
- int rval;
+ argc--;
+ argv++;
- if(!*gargv)
- {
- *dp = 0.0;
- return (0);
- }
- if(**gargv == '"' || **gargv == '\'')
- {
- *dp = asciicode();
- return (0);
- }
- rval = 0;
- errno = 0;
- if(mod_ldbl)
- *dp = strtold(*gargv, &ep);
- else
- *dp = strtod(*gargv, &ep);
- if(ep == *gargv)
- {
- utils_warnx("%s: expected numeric value", *gargv);
- rval = 1;
- }
- else if(*ep != '\0')
- {
- utils_warnx("%s: not completely converted", *gargv);
- rval = 1;
- }
- if(errno == ERANGE)
+ // To keep argv intact for '%n$' format conversion specifiers
+ char **fmt_argv = argv;
+ unsigned int fmt_argn = 0;
+
+ if(!strchr(fmt, '%'))
{
- utils_warnx("%s: %s", *gargv, strerror(ERANGE));
- rval = 1;
+ fwrite(fmt, 1, fmtlen, stdout);
+ return 0;
}
- ++gargv;
- return (rval);
-}
-static int
-asciicode(void)
-{
- int ch;
- wchar_t wch;
- mbstate_t mbs;
-
- ch = (unsigned char)**gargv;
- if(ch == '\'' || ch == '"')
+ do
{
- memset(&mbs, 0, sizeof(mbs));
- switch(mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs))
+ for(char *fmt_idx = fmt; fmt_idx < (fmt + fmtlen); fmt_idx++)
{
- case(size_t)-2:
- case(size_t)-1:
- wch = (unsigned char)gargv[0][1];
- break;
- case 0:
- wch = 0;
- break;
- }
- ch = wch;
- }
- ++gargv;
- return (ch);
-}
+ // Field width provided for consistency with C printf
+ int fwidth = 0;
+ /* "negative precision is taken as if the precision were omitted." — POSIX.1-2008 fprintf() */
+ int precision = -1;
+
+#define FMT_FLAGS "'-+ #0"
+#define FMT_BUF_SIZ sizeof("%" FMT_FLAGS "*.*d")
+ int fmt_bufi = 0;
+ static char fmt_buf[FMT_BUF_SIZ];
+ fmt_buf[fmt_bufi++] = '%';
+
+ char *fmt_arg = NULL;
+ if(*fmt_idx != '%')
+ {
+ char *p = strchr(fmt_idx, '%');
+ if(!p) p = fmt + fmtlen;
-static void
-usage(void)
-{
- (void)fprintf(stderr, "usage: printf format [arguments ...]\n");
+ fwrite(fmt_idx, 1, p - fmt_idx, stdout);
+ fmt_idx = (p - 1);
+ continue;
+ }
+
+ fmt_idx++;
+ if(!(fmt_idx < (fmt + fmtlen))) return 0;
+
+ // handle '%n$' if present
+ if(isndigit(*fmt_idx) && argc > 0)
+ {
+ errno = 0;
+ char *num_end = NULL;
+ unsigned int num = strtoul(fmt_idx, &num_end, 10);
+ if(errno == 0 && num != 0 && num_end && *num_end == '$')
+ {
+ fmt_arg = argv[(num - 1) % argc];
+ fmt_idx = num_end + 1;
+ fmt_buf[fmt_bufi++] = '*';
+ }
+ }
+
+ /* flags */
+ while(fmt_bufi < sizeof(FMT_FLAGS) && fmt_idx + 1 < (fmt + fmtlen) &&
+ strchr(FMT_FLAGS, *fmt_idx) != NULL)
+ {
+ if(memchr(fmt_buf, *fmt_idx, fmt_bufi + 1))
+ {
+ fprintf(stderr,
+ "fprintf: error: (format position %d) flag '%c' already set\n",
+ (int)(fmt_idx - fmt),
+ *fmt_idx);
+ return 1;
+ }
+
+ fmt_buf[fmt_bufi++] = *fmt_idx;
+ fmt_idx++;
+ }
+
+ // Field width from argument
+ fmt_buf[fmt_bufi++] = '*';
+ if(fmt_idx[0] == '*')
+ {
+ if(argc <= 0)
+ {
+ fprintf(stderr,
+ "fprintf: error: (format position %d) field-width argument without format "
+ "arguments\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+
+ char *fwidth_arg = NULL;
+ fmt_idx++;
+ if(isndigit(*fmt_idx))
+ {
+ if(!fmt_arg)
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) field-width positional argument usage "
+ "('*n$') also needs format data to be positional (via '%%n$')\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+
+ errno = 0;
+ char *num_end = NULL;
+ unsigned int num = strtoul(fmt_idx, &num_end, 10);
+ if(errno != 0)
+ {
+ fprintf(
+ stderr,
+ "printf: error: (format position %d) Failed parsing field-width as a number: %s\n",
+ (int)(fmt_idx - fmt),
+ strerror(errno));
+ return 1;
+ }
+ if(!num_end || *num_end != '$')
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) Expected to find '$' after field-width "
+ "digits\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+
+ fwidth_arg = fmt_argv[(num - 1) % argc];
+ fmt_idx = num_end + 1;
+ }
+ else
+ {
+ fmt_arg = fmt_argv[fmt_argn++ % argc];
+ fwidth_arg = fmt_argv[fmt_argn++ % argc];
+ }
+
+ errno = 0;
+ fwidth = strtoul(fwidth_arg, NULL, 0);
+ if(errno != 0)
+ {
+ fprintf(stderr,
+ "printf: error: Failed parsing argument (%s) as a number for field width: %s\n",
+ fwidth_arg,
+ strerror(errno));
+ return 1;
+ }
+ }
+ else if(isdigit(fmt_idx[0]))
+ {
+ errno = 0;
+ char *num_end = NULL;
+ fwidth = strtoul(fmt_idx, &num_end, 10);
+ if(errno != 0)
+ {
+ fprintf(
+ stderr,
+ "printf: error: (format position %d) Failed parsing field-width as a number: %s\n",
+ (int)(fmt_idx - fmt),
+ strerror(errno));
+ return 1;
+ }
+ if(!num_end)
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) No remaining characters after field-width "
+ "digits\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+ if(*num_end == '$')
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) Unexpectedly found '$' after '*'-less "
+ "field-width digits\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+ fmt_idx = num_end;
+ }
+
+ /* precision */
+ fmt_buf[fmt_bufi++] = '.';
+ fmt_buf[fmt_bufi++] = '*';
+ if(*fmt_idx == '.')
+ {
+ fmt_idx++;
+
+ if(*fmt_idx == '*')
+ {
+ fmt_idx++;
+
+ if(argc <= 0)
+ {
+ fprintf(stderr,
+ "fprintf: error: (format position %d) precision argument without format "
+ "arguments\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+
+ char *prec_arg = NULL;
+ fmt_idx++;
+ if(isndigit(*fmt_idx))
+ {
+ if(!fmt_arg)
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) precision positional argument usage "
+ "('.*n$') also needs format data to be positional (via '%%n$')\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+
+ errno = 0;
+ char *num_end = NULL;
+ unsigned int num = strtoul(fmt_idx, &num_end, 10);
+ if(errno != 0)
+ {
+ fprintf(
+ stderr,
+ "printf: error: (format position %d) Failed parsing precision as a number: %s\n",
+ (int)(fmt_idx - fmt),
+ strerror(errno));
+ return 1;
+ }
+ if(!num_end || *num_end != '$')
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) Expected to find '$' after precision "
+ "digits\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+
+ prec_arg = fmt_argv[(num - 1) % argc];
+
+ fmt_idx = num_end + 1;
+ }
+ else
+ {
+ prec_arg = fmt_argv[fmt_argn++ % argc];
+ }
+
+ errno = 0;
+ precision = strtoul(prec_arg, NULL, 0);
+ if(errno != 0)
+ {
+ fprintf(stderr,
+ "printf: error: Failed parsing argument (%s) as a number for precision: %s\n",
+ prec_arg,
+ strerror(errno));
+ return 1;
+ }
+ }
+ else if(isdigit(fmt_idx[0]))
+ {
+ errno = 0;
+ char *num_end = NULL;
+ precision = strtoul(fmt_idx, &num_end, 10);
+ if(errno != 0)
+ {
+ fprintf(
+ stderr,
+ "printf: error: (format position %d) Failed parsing precision as a number: %s\n",
+ (int)(fmt_idx - fmt),
+ strerror(errno));
+ return 1;
+ }
+ if(!num_end)
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) No remaining characters after precision's "
+ "digits\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+ if(*num_end == '$')
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) Unexpectedly found '$' after '*'-less "
+ "precision's digits\n",
+ (int)(fmt_idx - fmt));
+ return 1;
+ }
+ fmt_idx = num_end;
+ }
+ else
+ {
+ fprintf(stderr,
+ "printf: error: (format position %d) Unknown precision format (char: '%c')\n",
+ (int)(fmt_idx - fmt),
+ *fmt_idx);
+ return 1;
+ }
+ }
+
+ /* BSD compatibility */
+ if(*fmt_idx == 'L') fmt_idx++;
+
+ fmt_buf[fmt_bufi++] = *fmt_idx;
+ fmt_buf[fmt_bufi++] = '\0';
+
+ if(!fmt_arg) fmt_arg = (argc == 0) ? (char *)"" : fmt_argv[fmt_argn++ % argc];
+
+ switch(*fmt_idx)
+ {
+ case '%':
+ putchar(*fmt_idx);
+ break;
+ /* strings */
+ case 's':
+ printf(fmt_buf, fwidth, precision, fmt_arg);
+ break;
+ case 'b':
+ {
+ size_t arglen = strlen(fmt_arg);
+
+ int clear = escape(fmt_arg, &arglen, 0);
+
+ if(arglen > precision) arglen = precision;
+
+ /* left-justify if there's a '-' flag */
+ if(memchr(fmt_buf, '-', fmt_bufi + 1))
+ {
+ fwrite(fmt_arg, 1, arglen, stdout);
+
+ for(int pad = fwidth - arglen; pad > 0; pad--)
+ putchar(' ');
+ }
+ else
+ {
+ for(int pad = fwidth - arglen; pad > 0; pad--)
+ putchar(' ');
+
+ fwrite(fmt_arg, 1, arglen, stdout);
+ }
+
+ if(clear) return 0;
+
+ break;
+ }
+ case 'c':
+ printf("%*c", fwidth, *fmt_arg);
+ break;
+ /* integers */
+ case 'd':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'x':
+ case 'X':
+ {
+ errno = 0;
+ unsigned long int num = strtoul(fmt_arg, NULL, 0);
+ if(errno != 0)
+ {
+ fprintf(stderr,
+ "printf: error: Failed parsing argument (%s) as a number for format conversion "
+ "'%%%c': %s\n",
+ fmt_arg,
+ *fmt_idx,
+ strerror(errno));
+ return 1;
+ }
+
+ printf(fmt_buf, fwidth, precision, num);
+ break;
+ }
+ /* floats */
+ case 'a':
+ case 'A':
+ case 'e':
+ case 'E':
+ case 'f':
+ case 'F':
+ case 'g':
+ case 'G':
+ {
+ double num = strtod(fmt_arg, NULL);
+ if(errno != 0)
+ {
+ fprintf(stderr,
+ "printf: error: Failed parsing argument (%s) as a number for format conversion "
+ "'%%%c': %s\n",
+ fmt_arg,
+ *fmt_idx,
+ strerror(errno));
+ return 1;
+ }
+ printf(fmt_buf, fwidth, precision, num);
+ break;
+ }
+ default:
+ fprintf(stderr, "printf: error: Unknown conversion specifier '%c'\n", *fmt_idx);
+ return 1;
+ }
+ }
+ } while(fmt_argn < argc);
}
diff --git a/common.mk b/common.mk
@@ -7,6 +7,6 @@ lib/err.o: lib/err.c lib/err.h
lib/consent.o: lib/consent.c lib/consent.h
lib/tr_str.o: lib/tr_str.c lib/tr_str.h
cmd/cat: cmd/cat.c lib/fs.o lib/getopt_nolong.o
-cmd/printf: cmd/printf.c lib/err.o lib/getopt_nolong.o
+cmd/printf: cmd/printf.c
cmd/rm: cmd/rm.c lib/consent.o lib/getopt_nolong.o
cmd/tr: cmd/tr.c lib/tr_str.o lib/err.o lib/getopt_nolong.o
diff --git a/test-cmd/printf.sh b/test-cmd/printf.sh
@@ -2,9 +2,9 @@
# SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
# SPDX-License-Identifier: MPL-2.0
+plans=31
WD="$(dirname "$0")/../"
target="${WD}/cmd/printf"
-plans=6
. "${WD}/test-cmd/tap.sh"
t esc '\b\t\n' '
@@ -13,14 +13,48 @@ t esc '\b\t\n' '
t octal '\041' '!'
t hex '\x7B\x7d' '{}'
-t repeat_fmt '%s\n foo bar' 'foo
-bar
-'
-
-var_c_upper='\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$%%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
+t_args repeat 'foo,bar,baz,' '%s,' foo bar baz
+var_c_upper='\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$\%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
t_file esc_c_upper "${WD}/test-cmd/inputs/all_ascii" "${var_c_upper}"
-var_c_lower='\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$%%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
-
+var_c_lower='\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$\%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
t_file esc_c_lower "${WD}/test-cmd/inputs/all_ascii" "${var_c_lower}"
+
+t_args clear_vs_caret_esc 'foo :bar' 'foo \cH:%brat\n' 'bar\cHbaz'
+
+t_args fmt_b '
+!{}' '%b' '\b\t\n\041\x7B\x7d'
+
+t_args fmt_b_rightpad '!{} .' '%-6b%c' '\041\x7B\x7d' .
+t_args fmt_b_leftpad ' !{}.' '%6b%c' '\041\x7B\x7d' .
+
+t_args fmt_c 'foo' %c f oo oooo
+
+t_args fmt_d 10, %d, 10
+t_args fmt_Ld 10, %Ld, 10
+t_args fmt_i 10, %i, 10
+t_args fmt_o 12, %o, 10
+t_args fmt_u 10, %u, 10
+t_args fmt_x a, %x, 10
+t_args fmt_X A, %X, 10
+
+t_args fmt_e '1.000000e+01,' %e, 10
+t_args fmt_E '1.000000E+01,' %E, 10
+t_args fmt_f '10.000000,' %f, 10
+t_args fmt_Lf '10.000000,' %Lf, 10
+t_args fmt_F '10.000000,' %F, 10
+t_args fmt_g '10,' %g, 10
+t_args fmt_G '10,' %G, 10
+t_args fmt_a '0x1.4p+3,' %a, 10
+t_args fmt_A '0X1.4P+3,' %A, 10
+
+t_args nofmtarg '[
+]' '[%s\n]'
+
+t_args nofmtconv 'foobar
+' 'foobar\n' 1 2 3
+
+t_args nofmtconv_caret 'foo bar' 'foo \cHbar'
+
+t_args precision_s 'abcde' '%.5s' abcdefghijklmnopqrstuvwxyz