logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/
commit: 280bfc2fc6c34d325f70eead7d0e95b25f7fdc02
parent 8afdb73377e618b0259a35cda81868d99ad92149
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Mon, 26 May 2025 08:36:32 +0200

cmd/printf: rewrite, fixing documented bugs and getting rid of VLA

Diffstat:

Mcmd/printf.154+++++++++++-------------------------------------------
Mcmd/printf.c1099++++++++++++++++++++++++++++++++-----------------------------------------------
Mcommon.mk2+-
Mtest-cmd/printf.sh50++++++++++++++++++++++++++++++++++++++++++--------
4 files changed, 497 insertions(+), 708 deletions(-)

diff --git a/cmd/printf.1 b/cmd/printf.1 @@ -29,7 +29,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd March 9, 2025 +.Dd May 26, 2025 .Dt PRINTF 1 .Os .Sh NAME @@ -74,8 +74,8 @@ The format string is reused as often as necessary to satisfy the Any extra format specifications are evaluated with zero or the null string. .Pp -Character escape sequences are in backslash notation as defined in the -.St -ansiC , +Character escape sequences are in backslash notation as defined in +.St -isoC-2011 , with extensions. The characters and their meanings are as follows: @@ -101,8 +101,14 @@ Write a <vertical tab> character. Write a <single quote> character. .It Cm \e\e Write a backslash character. -.It Cm \ec Ns Ar char -Write a control character, where: +.It Cm \ec | Cm \ec Ns Ar char +In +.Cm %b +cut the string argument and make +.Nm printf +exits. +.Pp +Otherwise as format conversion escape: Write a control character, where: .Bl -bullet -compact .It .Cm @ @@ -422,41 +428,3 @@ command appeared in It is modeled after the standard library function, .Xr printf 3 . -.Sh CAVEATS -ANSI hexadecimal character constants were deliberately not provided. -.Pp -Trying to print a dash ("-") as the first character causes -.Nm -to interpret the dash as a program argument. -.Nm -- -must be used before -.Ar format . -.Pp -If the locale contains multibyte characters -(such as UTF-8), -the -.Cm c -format and -.Cm b -and -.Cm s -formats with a precision -may not operate as expected. -.Sh BUGS -Since the floating point numbers are translated from ASCII -to floating-point and then back again, floating-point precision may be lost. -(By default, the number is translated to an IEEE-754 double-precision -value before being printed. -The -.Cm L -modifier may produce additional precision, depending on the hardware platform.) -.Pp -The escape sequence \e000 is the string terminator. -When present in the argument for the -.Cm b -format, the argument will be truncated at the \e000 character. -.Pp -Multibyte characters are not recognized in format strings (this is only -a problem if -.Ql % -can appear inside a multibyte character). diff --git a/cmd/printf.c b/cmd/printf.c @@ -1,550 +1,79 @@ -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech> - * Copyright 2014 Garrett D'Amore <garrett@damore.org> - * Copyright 2010 Nexenta Systems, Inc. All rights reserved. - * Copyright (c) 1989, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ -/* - * Important: This file is used both as a standalone program /usr/bin/printf - * and as a builtin for /bin/sh (#define SHELL). - */ +// utils-std: Collection of commonly available Unix tools +// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> +// SPDX-License-Identifier: MPL-2.0 #define _POSIX_C_SOURCE 200809L - -#include "../lib/err.h" -#include "../lib/getopt_nolong.h" - -#include <assert.h> -#include <ctype.h> #include <errno.h> -#include <inttypes.h> -#include <limits.h> -#include <locale.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/types.h> -#include <unistd.h> -#include <wchar.h> - -#define PF(f, func) \ - do \ - { \ - if(havewidth) \ - if(haveprec) \ - (void)printf(f, fieldwidth, precision, func); \ - else \ - (void)printf(f, fieldwidth, func); \ - else if(haveprec) \ - (void)printf(f, precision, func); \ - else \ - (void)printf(f, func); \ - } while(0) - -static int asciicode(void); -static char *printf_doformat(char *, int *); -static int escape(char *, int, size_t *); -static int getchr(void); -static int getfloating(long double *, int); -static int getint(int *); -static int getnum(intmax_t *, uintmax_t *, int); -static const char *getstr(void); -static char *mknum(char *, char); -static void usage(void); - -static const char digits[] = "0123456789"; - -static char end_fmt[1]; - -static int myargc; -static char **myargv; -static char **gargv; -static char **maxargv; - -const char *argv0 = "printf"; +#include <stdio.h> // printf +#include <stdlib.h> // strtoul, strtod +#include <string.h> // strlen, memchr -int -main(int argc, char *argv[]) +// [1-9] +static int +isndigit(int c) { - size_t len; - int end, rval; - char *format, *fmt, *start; - - char *lc_all = setlocale(LC_ALL, ""); - if(lc_all == NULL) - { - fprintf(stderr, - "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n", - argv0, - strerror(errno)); - } - - for(int c = -1; (c = getopt_nolong(argc, argv, "")) != -1;) - { - switch(c) - { - case '?': - default: - usage(); - return (1); - } - } - - argc -= optind; - argv += optind; - - if(argc < 1) - { - usage(); - return (1); - } - - /* - * Basic algorithm is to scan the format string for conversion - * specifications -- once one is found, find out if the field - * width or precision is a '*'; if it is, gather up value. Note, - * format strings are reused as necessary to use up the provided - * arguments, arguments of zero/null string are provided to use - * up the format string. - */ - fmt = format = *argv; - escape(fmt, 1, &len); /* backslash interpretation */ - rval = end = 0; - gargv = ++argv; - - for(;;) - { - maxargv = gargv; - - myargv = gargv; - for(myargc = 0; gargv[myargc]; myargc++) - /* nop */; - start = fmt; - while(fmt < format + len) - { - if(fmt[0] == '%') - { - fwrite(start, 1, fmt - start, stdout); - if(fmt[1] == '%') - { - /* %% prints a % */ - putchar('%'); - fmt += 2; - } - else - { - fmt = printf_doformat(fmt, &rval); - if(fmt == NULL || fmt == end_fmt) - { - return (fmt == NULL ? 1 : rval); - } - end = 0; - } - start = fmt; - } - else - fmt++; - if(gargv > maxargv) maxargv = gargv; - } - gargv = maxargv; - - if(end == 1) - { - utils_warnx("missing format character"); - return (1); - } - fwrite(start, 1, fmt - start, stdout); - if(!*gargv) - { - return (rval); - } - /* Restart at the beginning of the format string. */ - fmt = format; - end = 1; - } - /* NOTREACHED */ + return c >= '1' && c <= '9'; } -static char * -printf_doformat(char *fmt, int *rval) +// digits [0-9] +static int +isdigit(int c) { - static const char skip1[] = "#'-+ 0"; - int fieldwidth, haveprec, havewidth, mod_ldbl, precision; - char convch, nextch; - char start[strlen(fmt) + 1]; - char **fargv; - char *dptr; - int l; - - dptr = start; - *dptr++ = '%'; - *dptr = 0; - - fmt++; - - /* look for "n$" field index specifier */ - l = strspn(fmt, digits); - if((l > 0) && (fmt[l] == '$')) - { - int idx = atoi(fmt); - if(idx <= myargc) - { - gargv = &myargv[idx - 1]; - } - else - { - gargv = &myargv[myargc]; - } - if(gargv > maxargv) maxargv = gargv; - fmt += l + 1; - - /* save format argument */ - fargv = gargv; - } - else - { - fargv = NULL; - } - - /* skip to field width */ - while(*fmt && strchr(skip1, *fmt) != NULL) - { - *dptr++ = *fmt++; - *dptr = 0; - } - - if(*fmt == '*') - { - - fmt++; - l = strspn(fmt, digits); - if((l > 0) && (fmt[l] == '$')) - { - int idx = atoi(fmt); - if(fargv == NULL) - { - utils_warnx("incomplete use of n$"); - return (NULL); - } - if(idx <= myargc) - { - gargv = &myargv[idx - 1]; - } - else - { - gargv = &myargv[myargc]; - } - fmt += l + 1; - } - else if(fargv != NULL) - { - utils_warnx("incomplete use of n$"); - return (NULL); - } - - if(getint(&fieldwidth)) return (NULL); - if(gargv > maxargv) maxargv = gargv; - havewidth = 1; - - *dptr++ = '*'; - *dptr = 0; - } - else - { - havewidth = 0; - - /* skip to possible '.', get following precision */ - while(isdigit(*fmt)) - { - *dptr++ = *fmt++; - *dptr = 0; - } - } - - if(*fmt == '.') - { - /* precision present? */ - fmt++; - *dptr++ = '.'; - - if(*fmt == '*') - { - - fmt++; - l = strspn(fmt, digits); - if((l > 0) && (fmt[l] == '$')) - { - int idx = atoi(fmt); - if(fargv == NULL) - { - utils_warnx("incomplete use of n$"); - return (NULL); - } - if(idx <= myargc) - { - gargv = &myargv[idx - 1]; - } - else - { - gargv = &myargv[myargc]; - } - fmt += l + 1; - } - else if(fargv != NULL) - { - utils_warnx("incomplete use of n$"); - return (NULL); - } - - if(getint(&precision)) return (NULL); - if(gargv > maxargv) maxargv = gargv; - haveprec = 1; - *dptr++ = '*'; - *dptr = 0; - } - else - { - haveprec = 0; - - /* skip to conversion char */ - while(isdigit(*fmt)) - { - *dptr++ = *fmt++; - *dptr = 0; - } - } - } - else - haveprec = 0; - if(!*fmt) - { - utils_warnx("missing format character"); - return (NULL); - } - *dptr++ = *fmt; - *dptr = 0; - - /* - * Look for a length modifier. POSIX doesn't have these, so - * we only support them for floating-point conversions, which - * are extensions. This is useful because the L modifier can - * be used to gain extra range and precision, while omitting - * it is more likely to produce consistent results on different - * architectures. This is not so important for integers - * because overflow is the only bad thing that can happen to - * them, but consider the command printf %a 1.1 - */ - if(*fmt == 'L') - { - mod_ldbl = 1; - fmt++; - if(!strchr("aAeEfFgG", *fmt)) - { - utils_warnx("bad modifier L for %%%c", *fmt); - return (NULL); - } - } - else - { - mod_ldbl = 0; - } - - /* save the current arg offset, and set to the format arg */ - if(fargv != NULL) - { - gargv = fargv; - } - - convch = *fmt; - nextch = *++fmt; - - *fmt = '\0'; - switch(convch) - { - case 'b': - { - size_t len; - char *p; - int getout; - - /* Convert "b" to "s" for output. */ - start[strlen(start) - 1] = 's'; - if((p = strdup(getstr())) == NULL) - { - utils_warnx("%s", strerror(ENOMEM)); - return (NULL); - } - getout = escape(p, 0, &len); - PF(start, p); - /* Restore format for next loop. */ - - free(p); - if(getout) return (end_fmt); - break; - } - case 'c': - { - char p; - - p = getchr(); - if(p != '\0') PF(start, p); - break; - } - case 's': - { - const char *p; - - p = getstr(); - PF(start, p); - break; - } - case 'd': - case 'i': - case 'o': - case 'u': - case 'x': - case 'X': - { - char *f; - intmax_t val; - uintmax_t uval; - int signedconv; - - signedconv = (convch == 'd' || convch == 'i'); - if((f = mknum(start, convch)) == NULL) return (NULL); - if(getnum(&val, &uval, signedconv)) *rval = 1; - if(signedconv) - PF(f, val); - else - PF(f, uval); - break; - } - case 'e': - case 'E': - case 'f': - case 'F': - case 'g': - case 'G': - case 'a': - case 'A': - { - long double p; - - if(getfloating(&p, mod_ldbl)) *rval = 1; - if(mod_ldbl) - PF(start, p); - else - PF(start, (double)p); - break; - } - default: - utils_warnx("illegal format character '%c'", convch); - return (NULL); - } - *fmt = nextch; - /* return the gargv to the next element */ - return (fmt); + return c >= '0' && c <= '9'; } -static char * -mknum(char *str, char ch) +// hex digits [0-9A-Fa-f] +static int +isxdigit(int c) { - static char *copy; - static size_t copy_size; - char *newcopy; - size_t len, newlen; - - len = strlen(str) + 2; - if(len > copy_size) - { - newlen = len + 1023; - assert(newlen != 0); - if((newcopy = realloc(copy, newlen)) == NULL) - { - utils_warnx("%s", strerror(ENOMEM)); - return (NULL); - } - copy = newcopy; - copy_size = newlen; - } - - memmove(copy, str, len - 3); - copy[len - 3] = 'j'; - copy[len - 2] = ch; - copy[len - 1] = '\0'; - return (copy); + return isdigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); } +// len parameter needed because of NULL escapes +// returns 1 for handling '\c' early ends static int -escape(char *fmt, int percent, size_t *len) +escape(char *fmt, size_t *len, int percent) { - char *save, *store, c; + char *start = fmt; + char *store; + char c = '\0'; int value; /* - * Required by POSIX.1-2024 for printf: \\ \a \b \f \n \r \t \v \000 + * Required by POSIX.1-2024 for printf(1): \\ \a \b \c \f \n \r \t \v \000 * - * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04> * As inspiration, required by POSIX.1-2024 for dollar-single-quote($'…'): - * \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000 + * \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000 + * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04> */ - for(save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store) + for(store = fmt; ((c = *fmt) != '\0') && fmt < (start + *len); ++fmt, ++store) { if(c != '\\') { *store = c; continue; } + switch(*++fmt) { - case '\0': /* EOS, user error */ - *store = '\\'; - *++store = '\0'; - *len = store - save; - return (0); - case '\\': /* backslash */ + case '\\': /* backslash; POSIX */ case '\'': /* single quote */ + default: *store = *fmt; break; - case 'a': /* bell/alert */ + case 'a': /* bell/alert; POSIX */ *store = '\a'; break; - case 'b': /* backspace */ + case 'b': /* backspace; POSIX */ *store = '\b'; break; case 'c': if(!percent) { + /* clear; POSIX */ *store = '\0'; - *len = store - save; - return (1); + *len = (size_t)(store - start); + return 1; } /* Assumes ASCII */ @@ -571,19 +100,19 @@ escape(char *fmt, int percent, size_t *len) case 'e': /* escape */ *store = '\033'; break; - case 'f': /* form-feed */ + case 'f': /* form-feed; POSIX */ *store = '\f'; break; - case 'n': /* newline */ + case 'n': /* newline; POSIX */ *store = '\n'; break; - case 'r': /* carriage-return */ + case 'r': /* carriage-return; POSIX */ *store = '\r'; break; - case 't': /* horizontal tab */ + case 't': /* horizontal tab; POSIX */ *store = '\t'; break; - case 'v': /* vertical tab */ + case 'v': /* vertical tab; POSIX */ *store = '\v'; break; case 'x': /* hex */ @@ -602,7 +131,7 @@ escape(char *fmt, int percent, size_t *len) --fmt; *store = (char)value; break; - /* octal constant */ + /* octal; POSIX */ case '0': case '1': case '2': @@ -618,170 +147,428 @@ escape(char *fmt, int percent, size_t *len) value += *fmt - '0'; } --fmt; - if(percent && value == '%') - { - *store++ = '%'; - *store = '%'; - } - else - *store = (char)value; - break; - default: - *store = *fmt; + *store = (percent && value == '%') ? '%' : (char)value; break; } } - *store = '\0'; - *len = store - save; - return (0); -} -static int -getchr(void) -{ - if(!gargv || !*gargv) return ('\0'); - return ((int)**gargv++); + *store = '\0'; + *len = (size_t)(store - start); + return 0; } -static const char * -getstr(void) +static void +usage(void) { - if(!gargv || !*gargv) return (""); - return (*gargv++); + (void)fputs("usage: printf format [arguments...]\n", stderr); } -static int -getint(int *ip) +int +main(int argc, char *argv[]) { - intmax_t val; - uintmax_t uval; - int rval; + argc--; + argv++; - if(getnum(&val, &uval, 1)) return (1); - rval = 0; - if(val < INT_MIN || val > INT_MAX) + if(argc < 1) { - utils_warnx("%s: %s", *gargv, strerror(ERANGE)); - rval = 1; + usage(); + return 1; } - *ip = (int)val; - return (rval); -} -static int -getnum(intmax_t *ip, uintmax_t *uip, int signedconv) -{ - char *ep; - int rval; + char *fmt = argv[0]; + size_t fmtlen = strlen(fmt); - if(!gargv || !*gargv) - { - *ip = *uip = 0; - return (0); - } - if(**gargv == '"' || **gargv == '\'') - { - if(signedconv) - *ip = asciicode(); - else - *uip = asciicode(); - return (0); - } - rval = 0; - errno = 0; - if(signedconv) - *ip = strtoimax(*gargv, &ep, 0); - else - *uip = strtoumax(*gargv, &ep, 0); - if(ep == *gargv) - { - utils_warnx("%s: expected numeric value", *gargv); - rval = 1; - } - else if(*ep != '\0') - { - utils_warnx("%s: not completely converted", *gargv); - rval = 1; - } - if(errno == ERANGE) - { - utils_warnx("%s: %s", *gargv, strerror(ERANGE)); - rval = 1; - } - ++gargv; - return (rval); -} + if(escape(fmt, &fmtlen, 1) != 0) return 1; -static int -getfloating(long double *dp, int mod_ldbl) -{ - char *ep; - int rval; + argc--; + argv++; - if(!*gargv) - { - *dp = 0.0; - return (0); - } - if(**gargv == '"' || **gargv == '\'') - { - *dp = asciicode(); - return (0); - } - rval = 0; - errno = 0; - if(mod_ldbl) - *dp = strtold(*gargv, &ep); - else - *dp = strtod(*gargv, &ep); - if(ep == *gargv) - { - utils_warnx("%s: expected numeric value", *gargv); - rval = 1; - } - else if(*ep != '\0') - { - utils_warnx("%s: not completely converted", *gargv); - rval = 1; - } - if(errno == ERANGE) + // To keep argv intact for '%n$' format conversion specifiers + char **fmt_argv = argv; + unsigned int fmt_argn = 0; + + if(!strchr(fmt, '%')) { - utils_warnx("%s: %s", *gargv, strerror(ERANGE)); - rval = 1; + fwrite(fmt, 1, fmtlen, stdout); + return 0; } - ++gargv; - return (rval); -} -static int -asciicode(void) -{ - int ch; - wchar_t wch; - mbstate_t mbs; - - ch = (unsigned char)**gargv; - if(ch == '\'' || ch == '"') + do { - memset(&mbs, 0, sizeof(mbs)); - switch(mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs)) + for(char *fmt_idx = fmt; fmt_idx < (fmt + fmtlen); fmt_idx++) { - case(size_t)-2: - case(size_t)-1: - wch = (unsigned char)gargv[0][1]; - break; - case 0: - wch = 0; - break; - } - ch = wch; - } - ++gargv; - return (ch); -} + // Field width provided for consistency with C printf + int fwidth = 0; + /* "negative precision is taken as if the precision were omitted." — POSIX.1-2008 fprintf() */ + int precision = -1; + +#define FMT_FLAGS "'-+ #0" +#define FMT_BUF_SIZ sizeof("%" FMT_FLAGS "*.*d") + int fmt_bufi = 0; + static char fmt_buf[FMT_BUF_SIZ]; + fmt_buf[fmt_bufi++] = '%'; + + char *fmt_arg = NULL; + if(*fmt_idx != '%') + { + char *p = strchr(fmt_idx, '%'); + if(!p) p = fmt + fmtlen; -static void -usage(void) -{ - (void)fprintf(stderr, "usage: printf format [arguments ...]\n"); + fwrite(fmt_idx, 1, p - fmt_idx, stdout); + fmt_idx = (p - 1); + continue; + } + + fmt_idx++; + if(!(fmt_idx < (fmt + fmtlen))) return 0; + + // handle '%n$' if present + if(isndigit(*fmt_idx) && argc > 0) + { + errno = 0; + char *num_end = NULL; + unsigned int num = strtoul(fmt_idx, &num_end, 10); + if(errno == 0 && num != 0 && num_end && *num_end == '$') + { + fmt_arg = argv[(num - 1) % argc]; + fmt_idx = num_end + 1; + fmt_buf[fmt_bufi++] = '*'; + } + } + + /* flags */ + while(fmt_bufi < sizeof(FMT_FLAGS) && fmt_idx + 1 < (fmt + fmtlen) && + strchr(FMT_FLAGS, *fmt_idx) != NULL) + { + if(memchr(fmt_buf, *fmt_idx, fmt_bufi + 1)) + { + fprintf(stderr, + "fprintf: error: (format position %d) flag '%c' already set\n", + (int)(fmt_idx - fmt), + *fmt_idx); + return 1; + } + + fmt_buf[fmt_bufi++] = *fmt_idx; + fmt_idx++; + } + + // Field width from argument + fmt_buf[fmt_bufi++] = '*'; + if(fmt_idx[0] == '*') + { + if(argc <= 0) + { + fprintf(stderr, + "fprintf: error: (format position %d) field-width argument without format " + "arguments\n", + (int)(fmt_idx - fmt)); + return 1; + } + + char *fwidth_arg = NULL; + fmt_idx++; + if(isndigit(*fmt_idx)) + { + if(!fmt_arg) + { + fprintf(stderr, + "printf: error: (format position %d) field-width positional argument usage " + "('*n$') also needs format data to be positional (via '%%n$')\n", + (int)(fmt_idx - fmt)); + return 1; + } + + errno = 0; + char *num_end = NULL; + unsigned int num = strtoul(fmt_idx, &num_end, 10); + if(errno != 0) + { + fprintf( + stderr, + "printf: error: (format position %d) Failed parsing field-width as a number: %s\n", + (int)(fmt_idx - fmt), + strerror(errno)); + return 1; + } + if(!num_end || *num_end != '$') + { + fprintf(stderr, + "printf: error: (format position %d) Expected to find '$' after field-width " + "digits\n", + (int)(fmt_idx - fmt)); + return 1; + } + + fwidth_arg = fmt_argv[(num - 1) % argc]; + fmt_idx = num_end + 1; + } + else + { + fmt_arg = fmt_argv[fmt_argn++ % argc]; + fwidth_arg = fmt_argv[fmt_argn++ % argc]; + } + + errno = 0; + fwidth = strtoul(fwidth_arg, NULL, 0); + if(errno != 0) + { + fprintf(stderr, + "printf: error: Failed parsing argument (%s) as a number for field width: %s\n", + fwidth_arg, + strerror(errno)); + return 1; + } + } + else if(isdigit(fmt_idx[0])) + { + errno = 0; + char *num_end = NULL; + fwidth = strtoul(fmt_idx, &num_end, 10); + if(errno != 0) + { + fprintf( + stderr, + "printf: error: (format position %d) Failed parsing field-width as a number: %s\n", + (int)(fmt_idx - fmt), + strerror(errno)); + return 1; + } + if(!num_end) + { + fprintf(stderr, + "printf: error: (format position %d) No remaining characters after field-width " + "digits\n", + (int)(fmt_idx - fmt)); + return 1; + } + if(*num_end == '$') + { + fprintf(stderr, + "printf: error: (format position %d) Unexpectedly found '$' after '*'-less " + "field-width digits\n", + (int)(fmt_idx - fmt)); + return 1; + } + fmt_idx = num_end; + } + + /* precision */ + fmt_buf[fmt_bufi++] = '.'; + fmt_buf[fmt_bufi++] = '*'; + if(*fmt_idx == '.') + { + fmt_idx++; + + if(*fmt_idx == '*') + { + fmt_idx++; + + if(argc <= 0) + { + fprintf(stderr, + "fprintf: error: (format position %d) precision argument without format " + "arguments\n", + (int)(fmt_idx - fmt)); + return 1; + } + + char *prec_arg = NULL; + fmt_idx++; + if(isndigit(*fmt_idx)) + { + if(!fmt_arg) + { + fprintf(stderr, + "printf: error: (format position %d) precision positional argument usage " + "('.*n$') also needs format data to be positional (via '%%n$')\n", + (int)(fmt_idx - fmt)); + return 1; + } + + errno = 0; + char *num_end = NULL; + unsigned int num = strtoul(fmt_idx, &num_end, 10); + if(errno != 0) + { + fprintf( + stderr, + "printf: error: (format position %d) Failed parsing precision as a number: %s\n", + (int)(fmt_idx - fmt), + strerror(errno)); + return 1; + } + if(!num_end || *num_end != '$') + { + fprintf(stderr, + "printf: error: (format position %d) Expected to find '$' after precision " + "digits\n", + (int)(fmt_idx - fmt)); + return 1; + } + + prec_arg = fmt_argv[(num - 1) % argc]; + + fmt_idx = num_end + 1; + } + else + { + prec_arg = fmt_argv[fmt_argn++ % argc]; + } + + errno = 0; + precision = strtoul(prec_arg, NULL, 0); + if(errno != 0) + { + fprintf(stderr, + "printf: error: Failed parsing argument (%s) as a number for precision: %s\n", + prec_arg, + strerror(errno)); + return 1; + } + } + else if(isdigit(fmt_idx[0])) + { + errno = 0; + char *num_end = NULL; + precision = strtoul(fmt_idx, &num_end, 10); + if(errno != 0) + { + fprintf( + stderr, + "printf: error: (format position %d) Failed parsing precision as a number: %s\n", + (int)(fmt_idx - fmt), + strerror(errno)); + return 1; + } + if(!num_end) + { + fprintf(stderr, + "printf: error: (format position %d) No remaining characters after precision's " + "digits\n", + (int)(fmt_idx - fmt)); + return 1; + } + if(*num_end == '$') + { + fprintf(stderr, + "printf: error: (format position %d) Unexpectedly found '$' after '*'-less " + "precision's digits\n", + (int)(fmt_idx - fmt)); + return 1; + } + fmt_idx = num_end; + } + else + { + fprintf(stderr, + "printf: error: (format position %d) Unknown precision format (char: '%c')\n", + (int)(fmt_idx - fmt), + *fmt_idx); + return 1; + } + } + + /* BSD compatibility */ + if(*fmt_idx == 'L') fmt_idx++; + + fmt_buf[fmt_bufi++] = *fmt_idx; + fmt_buf[fmt_bufi++] = '\0'; + + if(!fmt_arg) fmt_arg = (argc == 0) ? (char *)"" : fmt_argv[fmt_argn++ % argc]; + + switch(*fmt_idx) + { + case '%': + putchar(*fmt_idx); + break; + /* strings */ + case 's': + printf(fmt_buf, fwidth, precision, fmt_arg); + break; + case 'b': + { + size_t arglen = strlen(fmt_arg); + + int clear = escape(fmt_arg, &arglen, 0); + + if(arglen > precision) arglen = precision; + + /* left-justify if there's a '-' flag */ + if(memchr(fmt_buf, '-', fmt_bufi + 1)) + { + fwrite(fmt_arg, 1, arglen, stdout); + + for(int pad = fwidth - arglen; pad > 0; pad--) + putchar(' '); + } + else + { + for(int pad = fwidth - arglen; pad > 0; pad--) + putchar(' '); + + fwrite(fmt_arg, 1, arglen, stdout); + } + + if(clear) return 0; + + break; + } + case 'c': + printf("%*c", fwidth, *fmt_arg); + break; + /* integers */ + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + { + errno = 0; + unsigned long int num = strtoul(fmt_arg, NULL, 0); + if(errno != 0) + { + fprintf(stderr, + "printf: error: Failed parsing argument (%s) as a number for format conversion " + "'%%%c': %s\n", + fmt_arg, + *fmt_idx, + strerror(errno)); + return 1; + } + + printf(fmt_buf, fwidth, precision, num); + break; + } + /* floats */ + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + { + double num = strtod(fmt_arg, NULL); + if(errno != 0) + { + fprintf(stderr, + "printf: error: Failed parsing argument (%s) as a number for format conversion " + "'%%%c': %s\n", + fmt_arg, + *fmt_idx, + strerror(errno)); + return 1; + } + printf(fmt_buf, fwidth, precision, num); + break; + } + default: + fprintf(stderr, "printf: error: Unknown conversion specifier '%c'\n", *fmt_idx); + return 1; + } + } + } while(fmt_argn < argc); } diff --git a/common.mk b/common.mk @@ -7,6 +7,6 @@ lib/err.o: lib/err.c lib/err.h lib/consent.o: lib/consent.c lib/consent.h lib/tr_str.o: lib/tr_str.c lib/tr_str.h cmd/cat: cmd/cat.c lib/fs.o lib/getopt_nolong.o -cmd/printf: cmd/printf.c lib/err.o lib/getopt_nolong.o +cmd/printf: cmd/printf.c cmd/rm: cmd/rm.c lib/consent.o lib/getopt_nolong.o cmd/tr: cmd/tr.c lib/tr_str.o lib/err.o lib/getopt_nolong.o diff --git a/test-cmd/printf.sh b/test-cmd/printf.sh @@ -2,9 +2,9 @@ # SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me> # SPDX-License-Identifier: MPL-2.0 +plans=31 WD="$(dirname "$0")/../" target="${WD}/cmd/printf" -plans=6 . "${WD}/test-cmd/tap.sh" t esc '\b\t\n' ' @@ -13,14 +13,48 @@ t esc '\b\t\n' ' t octal '\041' '!' t hex '\x7B\x7d' '{}' -t repeat_fmt '%s\n foo bar' 'foo -bar -' - -var_c_upper='\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$%%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?' +t_args repeat 'foo,bar,baz,' '%s,' foo bar baz +var_c_upper='\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$\%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?' t_file esc_c_upper "${WD}/test-cmd/inputs/all_ascii" "${var_c_upper}" -var_c_lower='\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$%%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?' - +var_c_lower='\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$\%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?' t_file esc_c_lower "${WD}/test-cmd/inputs/all_ascii" "${var_c_lower}" + +t_args clear_vs_caret_esc 'foo :bar' 'foo \cH:%brat\n' 'bar\cHbaz' + +t_args fmt_b ' +!{}' '%b' '\b\t\n\041\x7B\x7d' + +t_args fmt_b_rightpad '!{} .' '%-6b%c' '\041\x7B\x7d' . +t_args fmt_b_leftpad ' !{}.' '%6b%c' '\041\x7B\x7d' . + +t_args fmt_c 'foo' %c f oo oooo + +t_args fmt_d 10, %d, 10 +t_args fmt_Ld 10, %Ld, 10 +t_args fmt_i 10, %i, 10 +t_args fmt_o 12, %o, 10 +t_args fmt_u 10, %u, 10 +t_args fmt_x a, %x, 10 +t_args fmt_X A, %X, 10 + +t_args fmt_e '1.000000e+01,' %e, 10 +t_args fmt_E '1.000000E+01,' %E, 10 +t_args fmt_f '10.000000,' %f, 10 +t_args fmt_Lf '10.000000,' %Lf, 10 +t_args fmt_F '10.000000,' %F, 10 +t_args fmt_g '10,' %g, 10 +t_args fmt_G '10,' %G, 10 +t_args fmt_a '0x1.4p+3,' %a, 10 +t_args fmt_A '0X1.4P+3,' %A, 10 + +t_args nofmtarg '[ +]' '[%s\n]' + +t_args nofmtconv 'foobar +' 'foobar\n' 1 2 3 + +t_args nofmtconv_caret 'foo bar' 'foo \cHbar' + +t_args precision_s 'abcde' '%.5s' abcdefghijklmnopqrstuvwxyz