cmd/printf: rewrite, fixing documented bugs and getting rid of VLA - utils-std - Collection of commonly available Unix tools

commit: 280bfc2fc6c34d325f70eead7d0e95b25f7fdc02
parent 8afdb73377e618b0259a35cda81868d99ad92149
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Mon, 26 May 2025 08:36:32 +0200

cmd/printf: rewrite, fixing documented bugs and getting rid of VLA

Diffstat:
M cmd/printf.1 54 +++++++++++-------------------------------------------
M cmd/printf.c 1099 ++++++++++++++++++++++++++++++++-----------------------------------------------
M common.mk 2 +-
M test-cmd/printf.sh 50 ++++++++++++++++++++++++++++++++++++++++++--------

4 files changed, 497 insertions(+), 708 deletions(-)
diff --git a/cmd/printf.1 b/cmd/printf.1
@@ -29,7 +29,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd March 9, 2025
+.Dd May 26, 2025
 .Dt PRINTF 1
 .Os
 .Sh NAME
@@ -74,8 +74,8 @@ The format string is reused as often as necessary to satisfy the
 Any extra format specifications are evaluated with zero or the null
 string.
 .Pp
-Character escape sequences are in backslash notation as defined in the
-.St -ansiC ,
+Character escape sequences are in backslash notation as defined in
+.St -isoC-2011 ,
 with extensions.
 The characters and their meanings
 are as follows:
@@ -101,8 +101,14 @@ Write a <vertical tab> character.
 Write a <single quote> character.
 .It Cm \e\e
 Write a backslash character.
-.It Cm \ec Ns Ar char
-Write a control character, where:
+.It Cm \ec | Cm \ec Ns Ar char
+In
+.Cm %b
+cut the string argument and make
+.Nm printf
+exits.
+.Pp
+Otherwise as format conversion escape: Write a control character, where:
 .Bl -bullet -compact
 .It
 .Cm @
@@ -422,41 +428,3 @@ command appeared in
 It is modeled
 after the standard library function,
 .Xr printf 3 .
-.Sh CAVEATS
-ANSI hexadecimal character constants were deliberately not provided.
-.Pp
-Trying to print a dash ("-") as the first character causes
-.Nm
-to interpret the dash as a program argument.
-.Nm --
-must be used before
-.Ar format .
-.Pp
-If the locale contains multibyte characters
-(such as UTF-8),
-the
-.Cm c
-format and
-.Cm b
-and
-.Cm s
-formats with a precision
-may not operate as expected.
-.Sh BUGS
-Since the floating point numbers are translated from ASCII
-to floating-point and then back again, floating-point precision may be lost.
-(By default, the number is translated to an IEEE-754 double-precision
-value before being printed.
-The
-.Cm L
-modifier may produce additional precision, depending on the hardware platform.)
-.Pp
-The escape sequence \e000 is the string terminator.
-When present in the argument for the
-.Cm b
-format, the argument will be truncated at the \e000 character.
-.Pp
-Multibyte characters are not recognized in format strings (this is only
-a problem if
-.Ql %
-can appear inside a multibyte character).
diff --git a/cmd/printf.c b/cmd/printf.c
@@ -1,550 +1,79 @@
-/*-
- * SPDX-License-Identifier: BSD-3-Clause
- *
- * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
- * Copyright 2014 Garrett D'Amore <garrett@damore.org>
- * Copyright 2010 Nexenta Systems, Inc.  All rights reserved.
- * Copyright (c) 1989, 1993
- *	The Regents of the University of California.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-/*
- * Important: This file is used both as a standalone program /usr/bin/printf
- * and as a builtin for /bin/sh (#define SHELL).
- */
+// utils-std: Collection of commonly available Unix tools
+// SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
+// SPDX-License-Identifier: MPL-2.0
 
 #define _POSIX_C_SOURCE 200809L
-
-#include "../lib/err.h"
-#include "../lib/getopt_nolong.h"
-
-#include <assert.h>
-#include <ctype.h>
 #include <errno.h>
-#include <inttypes.h>
-#include <limits.h>
-#include <locale.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <wchar.h>
-
-#define PF(f, func)                                                                                \
-	do                                                                                               \
-	{                                                                                                \
-		if(havewidth)                                                                                  \
-			if(haveprec)                                                                                 \
-				(void)printf(f, fieldwidth, precision, func);                                              \
-			else                                                                                         \
-				(void)printf(f, fieldwidth, func);                                                         \
-		else if(haveprec)                                                                              \
-			(void)printf(f, precision, func);                                                            \
-		else                                                                                           \
-			(void)printf(f, func);                                                                       \
-	} while(0)
-
-static int asciicode(void);
-static char *printf_doformat(char *, int *);
-static int escape(char *, int, size_t *);
-static int getchr(void);
-static int getfloating(long double *, int);
-static int getint(int *);
-static int getnum(intmax_t *, uintmax_t *, int);
-static const char *getstr(void);
-static char *mknum(char *, char);
-static void usage(void);
-
-static const char digits[] = "0123456789";
-
-static char end_fmt[1];
-
-static int myargc;
-static char **myargv;
-static char **gargv;
-static char **maxargv;
-
-const char *argv0 = "printf";
+#include <stdio.h>  // printf
+#include <stdlib.h> // strtoul, strtod
+#include <string.h> // strlen, memchr
 
-int
-main(int argc, char *argv[])
+// [1-9]
+static int
+isndigit(int c)
 {
-	size_t len;
-	int end, rval;
-	char *format, *fmt, *start;
-
-	char *lc_all = setlocale(LC_ALL, "");
-	if(lc_all == NULL)
-	{
-		fprintf(stderr,
-		        "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
-		        argv0,
-		        strerror(errno));
-	}
-
-	for(int c = -1; (c = getopt_nolong(argc, argv, "")) != -1;)
-	{
-		switch(c)
-		{
-		case '?':
-		default:
-			usage();
-			return (1);
-		}
-	}
-
-	argc -= optind;
-	argv += optind;
-
-	if(argc < 1)
-	{
-		usage();
-		return (1);
-	}
-
-	/*
-	 * Basic algorithm is to scan the format string for conversion
-	 * specifications -- once one is found, find out if the field
-	 * width or precision is a '*'; if it is, gather up value.  Note,
-	 * format strings are reused as necessary to use up the provided
-	 * arguments, arguments of zero/null string are provided to use
-	 * up the format string.
-	 */
-	fmt = format = *argv;
-	escape(fmt, 1, &len); /* backslash interpretation */
-	rval = end = 0;
-	gargv = ++argv;
-
-	for(;;)
-	{
-		maxargv = gargv;
-
-		myargv = gargv;
-		for(myargc = 0; gargv[myargc]; myargc++)
-			/* nop */;
-		start = fmt;
-		while(fmt < format + len)
-		{
-			if(fmt[0] == '%')
-			{
-				fwrite(start, 1, fmt - start, stdout);
-				if(fmt[1] == '%')
-				{
-					/* %% prints a % */
-					putchar('%');
-					fmt += 2;
-				}
-				else
-				{
-					fmt = printf_doformat(fmt, &rval);
-					if(fmt == NULL || fmt == end_fmt)
-					{
-						return (fmt == NULL ? 1 : rval);
-					}
-					end = 0;
-				}
-				start = fmt;
-			}
-			else
-				fmt++;
-			if(gargv > maxargv) maxargv = gargv;
-		}
-		gargv = maxargv;
-
-		if(end == 1)
-		{
-			utils_warnx("missing format character");
-			return (1);
-		}
-		fwrite(start, 1, fmt - start, stdout);
-		if(!*gargv)
-		{
-			return (rval);
-		}
-		/* Restart at the beginning of the format string. */
-		fmt = format;
-		end = 1;
-	}
-	/* NOTREACHED */
+	return c >= '1' && c <= '9';
 }
 
-static char *
-printf_doformat(char *fmt, int *rval)
+// digits [0-9]
+static int
+isdigit(int c)
 {
-	static const char skip1[] = "#'-+ 0";
-	int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
-	char convch, nextch;
-	char start[strlen(fmt) + 1];
-	char **fargv;
-	char *dptr;
-	int l;
-
-	dptr = start;
-	*dptr++ = '%';
-	*dptr = 0;
-
-	fmt++;
-
-	/* look for "n$" field index specifier */
-	l = strspn(fmt, digits);
-	if((l > 0) && (fmt[l] == '$'))
-	{
-		int idx = atoi(fmt);
-		if(idx <= myargc)
-		{
-			gargv = &myargv[idx - 1];
-		}
-		else
-		{
-			gargv = &myargv[myargc];
-		}
-		if(gargv > maxargv) maxargv = gargv;
-		fmt += l + 1;
-
-		/* save format argument */
-		fargv = gargv;
-	}
-	else
-	{
-		fargv = NULL;
-	}
-
-	/* skip to field width */
-	while(*fmt && strchr(skip1, *fmt) != NULL)
-	{
-		*dptr++ = *fmt++;
-		*dptr = 0;
-	}
-
-	if(*fmt == '*')
-	{
-
-		fmt++;
-		l = strspn(fmt, digits);
-		if((l > 0) && (fmt[l] == '$'))
-		{
-			int idx = atoi(fmt);
-			if(fargv == NULL)
-			{
-				utils_warnx("incomplete use of n$");
-				return (NULL);
-			}
-			if(idx <= myargc)
-			{
-				gargv = &myargv[idx - 1];
-			}
-			else
-			{
-				gargv = &myargv[myargc];
-			}
-			fmt += l + 1;
-		}
-		else if(fargv != NULL)
-		{
-			utils_warnx("incomplete use of n$");
-			return (NULL);
-		}
-
-		if(getint(&fieldwidth)) return (NULL);
-		if(gargv > maxargv) maxargv = gargv;
-		havewidth = 1;
-
-		*dptr++ = '*';
-		*dptr = 0;
-	}
-	else
-	{
-		havewidth = 0;
-
-		/* skip to possible '.', get following precision */
-		while(isdigit(*fmt))
-		{
-			*dptr++ = *fmt++;
-			*dptr = 0;
-		}
-	}
-
-	if(*fmt == '.')
-	{
-		/* precision present? */
-		fmt++;
-		*dptr++ = '.';
-
-		if(*fmt == '*')
-		{
-
-			fmt++;
-			l = strspn(fmt, digits);
-			if((l > 0) && (fmt[l] == '$'))
-			{
-				int idx = atoi(fmt);
-				if(fargv == NULL)
-				{
-					utils_warnx("incomplete use of n$");
-					return (NULL);
-				}
-				if(idx <= myargc)
-				{
-					gargv = &myargv[idx - 1];
-				}
-				else
-				{
-					gargv = &myargv[myargc];
-				}
-				fmt += l + 1;
-			}
-			else if(fargv != NULL)
-			{
-				utils_warnx("incomplete use of n$");
-				return (NULL);
-			}
-
-			if(getint(&precision)) return (NULL);
-			if(gargv > maxargv) maxargv = gargv;
-			haveprec = 1;
-			*dptr++ = '*';
-			*dptr = 0;
-		}
-		else
-		{
-			haveprec = 0;
-
-			/* skip to conversion char */
-			while(isdigit(*fmt))
-			{
-				*dptr++ = *fmt++;
-				*dptr = 0;
-			}
-		}
-	}
-	else
-		haveprec = 0;
-	if(!*fmt)
-	{
-		utils_warnx("missing format character");
-		return (NULL);
-	}
-	*dptr++ = *fmt;
-	*dptr = 0;
-
-	/*
-	 * Look for a length modifier.  POSIX doesn't have these, so
-	 * we only support them for floating-point conversions, which
-	 * are extensions.  This is useful because the L modifier can
-	 * be used to gain extra range and precision, while omitting
-	 * it is more likely to produce consistent results on different
-	 * architectures.  This is not so important for integers
-	 * because overflow is the only bad thing that can happen to
-	 * them, but consider the command  printf %a 1.1
-	 */
-	if(*fmt == 'L')
-	{
-		mod_ldbl = 1;
-		fmt++;
-		if(!strchr("aAeEfFgG", *fmt))
-		{
-			utils_warnx("bad modifier L for %%%c", *fmt);
-			return (NULL);
-		}
-	}
-	else
-	{
-		mod_ldbl = 0;
-	}
-
-	/* save the current arg offset, and set to the format arg */
-	if(fargv != NULL)
-	{
-		gargv = fargv;
-	}
-
-	convch = *fmt;
-	nextch = *++fmt;
-
-	*fmt = '\0';
-	switch(convch)
-	{
-	case 'b':
-	{
-		size_t len;
-		char *p;
-		int getout;
-
-		/* Convert "b" to "s" for output. */
-		start[strlen(start) - 1] = 's';
-		if((p = strdup(getstr())) == NULL)
-		{
-			utils_warnx("%s", strerror(ENOMEM));
-			return (NULL);
-		}
-		getout = escape(p, 0, &len);
-		PF(start, p);
-		/* Restore format for next loop. */
-
-		free(p);
-		if(getout) return (end_fmt);
-		break;
-	}
-	case 'c':
-	{
-		char p;
-
-		p = getchr();
-		if(p != '\0') PF(start, p);
-		break;
-	}
-	case 's':
-	{
-		const char *p;
-
-		p = getstr();
-		PF(start, p);
-		break;
-	}
-	case 'd':
-	case 'i':
-	case 'o':
-	case 'u':
-	case 'x':
-	case 'X':
-	{
-		char *f;
-		intmax_t val;
-		uintmax_t uval;
-		int signedconv;
-
-		signedconv = (convch == 'd' || convch == 'i');
-		if((f = mknum(start, convch)) == NULL) return (NULL);
-		if(getnum(&val, &uval, signedconv)) *rval = 1;
-		if(signedconv)
-			PF(f, val);
-		else
-			PF(f, uval);
-		break;
-	}
-	case 'e':
-	case 'E':
-	case 'f':
-	case 'F':
-	case 'g':
-	case 'G':
-	case 'a':
-	case 'A':
-	{
-		long double p;
-
-		if(getfloating(&p, mod_ldbl)) *rval = 1;
-		if(mod_ldbl)
-			PF(start, p);
-		else
-			PF(start, (double)p);
-		break;
-	}
-	default:
-		utils_warnx("illegal format character '%c'", convch);
-		return (NULL);
-	}
-	*fmt = nextch;
-	/* return the gargv to the next element */
-	return (fmt);
+	return c >= '0' && c <= '9';
 }
 
-static char *
-mknum(char *str, char ch)
+// hex digits [0-9A-Fa-f]
+static int
+isxdigit(int c)
 {
-	static char *copy;
-	static size_t copy_size;
-	char *newcopy;
-	size_t len, newlen;
-
-	len = strlen(str) + 2;
-	if(len > copy_size)
-	{
-		newlen = len + 1023;
-		assert(newlen != 0);
-		if((newcopy = realloc(copy, newlen)) == NULL)
-		{
-			utils_warnx("%s", strerror(ENOMEM));
-			return (NULL);
-		}
-		copy = newcopy;
-		copy_size = newlen;
-	}
-
-	memmove(copy, str, len - 3);
-	copy[len - 3] = 'j';
-	copy[len - 2] = ch;
-	copy[len - 1] = '\0';
-	return (copy);
+	return isdigit(c) || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
 }
 
+// len parameter needed because of NULL escapes
+// returns 1 for handling '\c' early ends
 static int
-escape(char *fmt, int percent, size_t *len)
+escape(char *fmt, size_t *len, int percent)
 {
-	char *save, *store, c;
+	char *start = fmt;
+	char *store;
+	char c = '\0';
 	int value;
 
 	/*
-	 * Required by POSIX.1-2024 for printf: \\ \a \b \f \n \r \t \v \000
+	 * Required by POSIX.1-2024 for printf(1): \\ \a \b \c \f \n \r \t \v \000
 	 *
-	 * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04>
 	 * As inspiration, required by POSIX.1-2024 for dollar-single-quote($'…'):
-	 *     \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000
+	 *    \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000
+	 * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04>
 	 */
-	for(save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store)
+	for(store = fmt; ((c = *fmt) != '\0') && fmt < (start + *len); ++fmt, ++store)
 	{
 		if(c != '\\')
 		{
 			*store = c;
 			continue;
 		}
+
 		switch(*++fmt)
 		{
-		case '\0': /* EOS, user error */
-			*store = '\\';
-			*++store = '\0';
-			*len = store - save;
-			return (0);
-		case '\\': /* backslash */
+		case '\\': /* backslash; POSIX */
 		case '\'': /* single quote */
+		default:
 			*store = *fmt;
 			break;
-		case 'a': /* bell/alert */
+		case 'a': /* bell/alert; POSIX */
 			*store = '\a';
 			break;
-		case 'b': /* backspace */
+		case 'b': /* backspace; POSIX */
 			*store = '\b';
 			break;
 		case 'c':
 			if(!percent)
 			{
+				/* clear; POSIX */
 				*store = '\0';
-				*len = store - save;
-				return (1);
+				*len = (size_t)(store - start);
+				return 1;
 			}
 
 			/* Assumes ASCII */
@@ -571,19 +100,19 @@ escape(char *fmt, int percent, size_t *len)
 		case 'e': /* escape */
 			*store = '\033';
 			break;
-		case 'f': /* form-feed */
+		case 'f': /* form-feed; POSIX */
 			*store = '\f';
 			break;
-		case 'n': /* newline */
+		case 'n': /* newline; POSIX */
 			*store = '\n';
 			break;
-		case 'r': /* carriage-return */
+		case 'r': /* carriage-return; POSIX */
 			*store = '\r';
 			break;
-		case 't': /* horizontal tab */
+		case 't': /* horizontal tab; POSIX */
 			*store = '\t';
 			break;
-		case 'v': /* vertical tab */
+		case 'v': /* vertical tab; POSIX */
 			*store = '\v';
 			break;
 		case 'x': /* hex */
@@ -602,7 +131,7 @@ escape(char *fmt, int percent, size_t *len)
 			--fmt;
 			*store = (char)value;
 			break;
-			/* octal constant */
+		/* octal; POSIX */
 		case '0':
 		case '1':
 		case '2':
@@ -618,170 +147,428 @@ escape(char *fmt, int percent, size_t *len)
 				value += *fmt - '0';
 			}
 			--fmt;
-			if(percent && value == '%')
-			{
-				*store++ = '%';
-				*store = '%';
-			}
-			else
-				*store = (char)value;
-			break;
-		default:
-			*store = *fmt;
+			*store = (percent && value == '%') ? '%' : (char)value;
 			break;
 		}
 	}
-	*store = '\0';
-	*len = store - save;
-	return (0);
-}
 
-static int
-getchr(void)
-{
-	if(!gargv || !*gargv) return ('\0');
-	return ((int)**gargv++);
+	*store = '\0';
+	*len = (size_t)(store - start);
+	return 0;
 }
 
-static const char *
-getstr(void)
+static void
+usage(void)
 {
-	if(!gargv || !*gargv) return ("");
-	return (*gargv++);
+	(void)fputs("usage: printf format [arguments...]\n", stderr);
 }
 
-static int
-getint(int *ip)
+int
+main(int argc, char *argv[])
 {
-	intmax_t val;
-	uintmax_t uval;
-	int rval;
+	argc--;
+	argv++;
 
-	if(getnum(&val, &uval, 1)) return (1);
-	rval = 0;
-	if(val < INT_MIN || val > INT_MAX)
+	if(argc < 1)
 	{
-		utils_warnx("%s: %s", *gargv, strerror(ERANGE));
-		rval = 1;
+		usage();
+		return 1;
 	}
-	*ip = (int)val;
-	return (rval);
-}
 
-static int
-getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
-{
-	char *ep;
-	int rval;
+	char *fmt = argv[0];
+	size_t fmtlen = strlen(fmt);
 
-	if(!gargv || !*gargv)
-	{
-		*ip = *uip = 0;
-		return (0);
-	}
-	if(**gargv == '"' || **gargv == '\'')
-	{
-		if(signedconv)
-			*ip = asciicode();
-		else
-			*uip = asciicode();
-		return (0);
-	}
-	rval = 0;
-	errno = 0;
-	if(signedconv)
-		*ip = strtoimax(*gargv, &ep, 0);
-	else
-		*uip = strtoumax(*gargv, &ep, 0);
-	if(ep == *gargv)
-	{
-		utils_warnx("%s: expected numeric value", *gargv);
-		rval = 1;
-	}
-	else if(*ep != '\0')
-	{
-		utils_warnx("%s: not completely converted", *gargv);
-		rval = 1;
-	}
-	if(errno == ERANGE)
-	{
-		utils_warnx("%s: %s", *gargv, strerror(ERANGE));
-		rval = 1;
-	}
-	++gargv;
-	return (rval);
-}
+	if(escape(fmt, &fmtlen, 1) != 0) return 1;
 
-static int
-getfloating(long double *dp, int mod_ldbl)
-{
-	char *ep;
-	int rval;
+	argc--;
+	argv++;
 
-	if(!*gargv)
-	{
-		*dp = 0.0;
-		return (0);
-	}
-	if(**gargv == '"' || **gargv == '\'')
-	{
-		*dp = asciicode();
-		return (0);
-	}
-	rval = 0;
-	errno = 0;
-	if(mod_ldbl)
-		*dp = strtold(*gargv, &ep);
-	else
-		*dp = strtod(*gargv, &ep);
-	if(ep == *gargv)
-	{
-		utils_warnx("%s: expected numeric value", *gargv);
-		rval = 1;
-	}
-	else if(*ep != '\0')
-	{
-		utils_warnx("%s: not completely converted", *gargv);
-		rval = 1;
-	}
-	if(errno == ERANGE)
+	// To keep argv intact for '%n$' format conversion specifiers
+	char **fmt_argv = argv;
+	unsigned int fmt_argn = 0;
+
+	if(!strchr(fmt, '%'))
 	{
-		utils_warnx("%s: %s", *gargv, strerror(ERANGE));
-		rval = 1;
+		fwrite(fmt, 1, fmtlen, stdout);
+		return 0;
 	}
-	++gargv;
-	return (rval);
-}
 
-static int
-asciicode(void)
-{
-	int ch;
-	wchar_t wch;
-	mbstate_t mbs;
-
-	ch = (unsigned char)**gargv;
-	if(ch == '\'' || ch == '"')
+	do
 	{
-		memset(&mbs, 0, sizeof(mbs));
-		switch(mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs))
+		for(char *fmt_idx = fmt; fmt_idx < (fmt + fmtlen); fmt_idx++)
 		{
-		case(size_t)-2:
-		case(size_t)-1:
-			wch = (unsigned char)gargv[0][1];
-			break;
-		case 0:
-			wch = 0;
-			break;
-		}
-		ch = wch;
-	}
-	++gargv;
-	return (ch);
-}
+			// Field width provided for consistency with C printf
+			int fwidth = 0;
+			/* "negative precision is taken as if the precision were omitted." — POSIX.1-2008 fprintf() */
+			int precision = -1;
+
+#define FMT_FLAGS "'-+ #0"
+#define FMT_BUF_SIZ sizeof("%" FMT_FLAGS "*.*d")
+			int fmt_bufi = 0;
+			static char fmt_buf[FMT_BUF_SIZ];
+			fmt_buf[fmt_bufi++] = '%';
+
+			char *fmt_arg = NULL;
+			if(*fmt_idx != '%')
+			{
+				char *p = strchr(fmt_idx, '%');
+				if(!p) p = fmt + fmtlen;
 
-static void
-usage(void)
-{
-	(void)fprintf(stderr, "usage: printf format [arguments ...]\n");
+				fwrite(fmt_idx, 1, p - fmt_idx, stdout);
+				fmt_idx = (p - 1);
+				continue;
+			}
+
+			fmt_idx++;
+			if(!(fmt_idx < (fmt + fmtlen))) return 0;
+
+			// handle '%n$' if present
+			if(isndigit(*fmt_idx) && argc > 0)
+			{
+				errno = 0;
+				char *num_end = NULL;
+				unsigned int num = strtoul(fmt_idx, &num_end, 10);
+				if(errno == 0 && num != 0 && num_end && *num_end == '$')
+				{
+					fmt_arg = argv[(num - 1) % argc];
+					fmt_idx = num_end + 1;
+					fmt_buf[fmt_bufi++] = '*';
+				}
+			}
+
+			/* flags */
+			while(fmt_bufi < sizeof(FMT_FLAGS) && fmt_idx + 1 < (fmt + fmtlen) &&
+			      strchr(FMT_FLAGS, *fmt_idx) != NULL)
+			{
+				if(memchr(fmt_buf, *fmt_idx, fmt_bufi + 1))
+				{
+					fprintf(stderr,
+					        "fprintf: error: (format position %d) flag '%c' already set\n",
+					        (int)(fmt_idx - fmt),
+					        *fmt_idx);
+					return 1;
+				}
+
+				fmt_buf[fmt_bufi++] = *fmt_idx;
+				fmt_idx++;
+			}
+
+			// Field width from argument
+			fmt_buf[fmt_bufi++] = '*';
+			if(fmt_idx[0] == '*')
+			{
+				if(argc <= 0)
+				{
+					fprintf(stderr,
+					        "fprintf: error: (format position %d) field-width argument without format "
+					        "arguments\n",
+					        (int)(fmt_idx - fmt));
+					return 1;
+				}
+
+				char *fwidth_arg = NULL;
+				fmt_idx++;
+				if(isndigit(*fmt_idx))
+				{
+					if(!fmt_arg)
+					{
+						fprintf(stderr,
+						        "printf: error: (format position %d) field-width positional argument usage "
+						        "('*n$') also needs format data to be positional (via '%%n$')\n",
+						        (int)(fmt_idx - fmt));
+						return 1;
+					}
+
+					errno = 0;
+					char *num_end = NULL;
+					unsigned int num = strtoul(fmt_idx, &num_end, 10);
+					if(errno != 0)
+					{
+						fprintf(
+						    stderr,
+						    "printf: error: (format position %d) Failed parsing field-width as a number: %s\n",
+						    (int)(fmt_idx - fmt),
+						    strerror(errno));
+						return 1;
+					}
+					if(!num_end || *num_end != '$')
+					{
+						fprintf(stderr,
+						        "printf: error: (format position %d) Expected to find '$' after field-width "
+						        "digits\n",
+						        (int)(fmt_idx - fmt));
+						return 1;
+					}
+
+					fwidth_arg = fmt_argv[(num - 1) % argc];
+					fmt_idx = num_end + 1;
+				}
+				else
+				{
+					fmt_arg = fmt_argv[fmt_argn++ % argc];
+					fwidth_arg = fmt_argv[fmt_argn++ % argc];
+				}
+
+				errno = 0;
+				fwidth = strtoul(fwidth_arg, NULL, 0);
+				if(errno != 0)
+				{
+					fprintf(stderr,
+					        "printf: error: Failed parsing argument (%s) as a number for field width: %s\n",
+					        fwidth_arg,
+					        strerror(errno));
+					return 1;
+				}
+			}
+			else if(isdigit(fmt_idx[0]))
+			{
+				errno = 0;
+				char *num_end = NULL;
+				fwidth = strtoul(fmt_idx, &num_end, 10);
+				if(errno != 0)
+				{
+					fprintf(
+					    stderr,
+					    "printf: error: (format position %d) Failed parsing field-width as a number: %s\n",
+					    (int)(fmt_idx - fmt),
+					    strerror(errno));
+					return 1;
+				}
+				if(!num_end)
+				{
+					fprintf(stderr,
+					        "printf: error: (format position %d) No remaining characters after field-width "
+					        "digits\n",
+					        (int)(fmt_idx - fmt));
+					return 1;
+				}
+				if(*num_end == '$')
+				{
+					fprintf(stderr,
+					        "printf: error: (format position %d) Unexpectedly found '$' after '*'-less "
+					        "field-width digits\n",
+					        (int)(fmt_idx - fmt));
+					return 1;
+				}
+				fmt_idx = num_end;
+			}
+
+			/* precision */
+			fmt_buf[fmt_bufi++] = '.';
+			fmt_buf[fmt_bufi++] = '*';
+			if(*fmt_idx == '.')
+			{
+				fmt_idx++;
+
+				if(*fmt_idx == '*')
+				{
+					fmt_idx++;
+
+					if(argc <= 0)
+					{
+						fprintf(stderr,
+						        "fprintf: error: (format position %d) precision argument without format "
+						        "arguments\n",
+						        (int)(fmt_idx - fmt));
+						return 1;
+					}
+
+					char *prec_arg = NULL;
+					fmt_idx++;
+					if(isndigit(*fmt_idx))
+					{
+						if(!fmt_arg)
+						{
+							fprintf(stderr,
+							        "printf: error: (format position %d) precision positional argument usage "
+							        "('.*n$') also needs format data to be positional (via '%%n$')\n",
+							        (int)(fmt_idx - fmt));
+							return 1;
+						}
+
+						errno = 0;
+						char *num_end = NULL;
+						unsigned int num = strtoul(fmt_idx, &num_end, 10);
+						if(errno != 0)
+						{
+							fprintf(
+							    stderr,
+							    "printf: error: (format position %d) Failed parsing precision as a number: %s\n",
+							    (int)(fmt_idx - fmt),
+							    strerror(errno));
+							return 1;
+						}
+						if(!num_end || *num_end != '$')
+						{
+							fprintf(stderr,
+							        "printf: error: (format position %d) Expected to find '$' after precision "
+							        "digits\n",
+							        (int)(fmt_idx - fmt));
+							return 1;
+						}
+
+						prec_arg = fmt_argv[(num - 1) % argc];
+
+						fmt_idx = num_end + 1;
+					}
+					else
+					{
+						prec_arg = fmt_argv[fmt_argn++ % argc];
+					}
+
+					errno = 0;
+					precision = strtoul(prec_arg, NULL, 0);
+					if(errno != 0)
+					{
+						fprintf(stderr,
+						        "printf: error: Failed parsing argument (%s) as a number for precision: %s\n",
+						        prec_arg,
+						        strerror(errno));
+						return 1;
+					}
+				}
+				else if(isdigit(fmt_idx[0]))
+				{
+					errno = 0;
+					char *num_end = NULL;
+					precision = strtoul(fmt_idx, &num_end, 10);
+					if(errno != 0)
+					{
+						fprintf(
+						    stderr,
+						    "printf: error: (format position %d) Failed parsing precision as a number: %s\n",
+						    (int)(fmt_idx - fmt),
+						    strerror(errno));
+						return 1;
+					}
+					if(!num_end)
+					{
+						fprintf(stderr,
+						        "printf: error: (format position %d) No remaining characters after precision's "
+						        "digits\n",
+						        (int)(fmt_idx - fmt));
+						return 1;
+					}
+					if(*num_end == '$')
+					{
+						fprintf(stderr,
+						        "printf: error: (format position %d) Unexpectedly found '$' after '*'-less "
+						        "precision's digits\n",
+						        (int)(fmt_idx - fmt));
+						return 1;
+					}
+					fmt_idx = num_end;
+				}
+				else
+				{
+					fprintf(stderr,
+					        "printf: error: (format position %d) Unknown precision format (char: '%c')\n",
+					        (int)(fmt_idx - fmt),
+					        *fmt_idx);
+					return 1;
+				}
+			}
+
+			/* BSD compatibility */
+			if(*fmt_idx == 'L') fmt_idx++;
+
+			fmt_buf[fmt_bufi++] = *fmt_idx;
+			fmt_buf[fmt_bufi++] = '\0';
+
+			if(!fmt_arg) fmt_arg = (argc == 0) ? (char *)"" : fmt_argv[fmt_argn++ % argc];
+
+			switch(*fmt_idx)
+			{
+			case '%':
+				putchar(*fmt_idx);
+				break;
+			/* strings */
+			case 's':
+				printf(fmt_buf, fwidth, precision, fmt_arg);
+				break;
+			case 'b':
+			{
+				size_t arglen = strlen(fmt_arg);
+
+				int clear = escape(fmt_arg, &arglen, 0);
+
+				if(arglen > precision) arglen = precision;
+
+				/* left-justify if there's a '-' flag */
+				if(memchr(fmt_buf, '-', fmt_bufi + 1))
+				{
+					fwrite(fmt_arg, 1, arglen, stdout);
+
+					for(int pad = fwidth - arglen; pad > 0; pad--)
+						putchar(' ');
+				}
+				else
+				{
+					for(int pad = fwidth - arglen; pad > 0; pad--)
+						putchar(' ');
+
+					fwrite(fmt_arg, 1, arglen, stdout);
+				}
+
+				if(clear) return 0;
+
+				break;
+			}
+			case 'c':
+				printf("%*c", fwidth, *fmt_arg);
+				break;
+			/* integers */
+			case 'd':
+			case 'i':
+			case 'o':
+			case 'u':
+			case 'x':
+			case 'X':
+			{
+				errno = 0;
+				unsigned long int num = strtoul(fmt_arg, NULL, 0);
+				if(errno != 0)
+				{
+					fprintf(stderr,
+					        "printf: error: Failed parsing argument (%s) as a number for format conversion "
+					        "'%%%c': %s\n",
+					        fmt_arg,
+					        *fmt_idx,
+					        strerror(errno));
+					return 1;
+				}
+
+				printf(fmt_buf, fwidth, precision, num);
+				break;
+			}
+			/* floats */
+			case 'a':
+			case 'A':
+			case 'e':
+			case 'E':
+			case 'f':
+			case 'F':
+			case 'g':
+			case 'G':
+			{
+				double num = strtod(fmt_arg, NULL);
+				if(errno != 0)
+				{
+					fprintf(stderr,
+					        "printf: error: Failed parsing argument (%s) as a number for format conversion "
+					        "'%%%c': %s\n",
+					        fmt_arg,
+					        *fmt_idx,
+					        strerror(errno));
+					return 1;
+				}
+				printf(fmt_buf, fwidth, precision, num);
+				break;
+			}
+			default:
+				fprintf(stderr, "printf: error: Unknown conversion specifier '%c'\n", *fmt_idx);
+				return 1;
+			}
+		}
+	} while(fmt_argn < argc);
 }
diff --git a/common.mk b/common.mk
@@ -7,6 +7,6 @@ lib/err.o: lib/err.c lib/err.h
 lib/consent.o: lib/consent.c lib/consent.h
 lib/tr_str.o: lib/tr_str.c lib/tr_str.h
 cmd/cat: cmd/cat.c lib/fs.o lib/getopt_nolong.o
-cmd/printf: cmd/printf.c lib/err.o lib/getopt_nolong.o
+cmd/printf: cmd/printf.c
 cmd/rm: cmd/rm.c lib/consent.o lib/getopt_nolong.o
 cmd/tr: cmd/tr.c lib/tr_str.o lib/err.o lib/getopt_nolong.o
diff --git a/test-cmd/printf.sh b/test-cmd/printf.sh
@@ -2,9 +2,9 @@
 # SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
 # SPDX-License-Identifier: MPL-2.0
 
+plans=31
 WD="$(dirname "$0")/../"
 target="${WD}/cmd/printf"
-plans=6
 . "${WD}/test-cmd/tap.sh"
 
 t esc '\b\t\n' '	
@@ -13,14 +13,48 @@ t esc '\b\t\n' '
 t octal '\041' '!'
 t hex '\x7B\x7d' '{}'
 
-t repeat_fmt '%s\n foo bar' 'foo
-bar
-'
-
-var_c_upper='\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$%%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
+t_args repeat 'foo,bar,baz,' '%s,' foo bar baz
 
+var_c_upper='\c@\cA\cB\cC\cD\cE\cF\cG\cH\cI\cJ\cK\cL\cM\cN\cO\cP\cQ\cR\cS\cT\cU\cV\cW\cX\cY\cZ\c[\c\\c]\c^\c_ !"#$\%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
 t_file esc_c_upper "${WD}/test-cmd/inputs/all_ascii" "${var_c_upper}"
 
-var_c_lower='\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$%%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
-
+var_c_lower='\c@\ca\cb\cc\cd\ce\cf\cg\ch\ci\cj\ck\cl\cm\cn\co\cp\cq\cr\cs\ct\cu\cv\cw\cx\cy\cz\c[\c\\c]\c^\c_ !"#$\%&'"'"'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\c?'
 t_file esc_c_lower "${WD}/test-cmd/inputs/all_ascii" "${var_c_lower}"
+
+t_args clear_vs_caret_esc 'foo :bar' 'foo \cH:%brat\n' 'bar\cHbaz'
+
+t_args fmt_b '	
+!{}' '%b' '\b\t\n\041\x7B\x7d'
+
+t_args fmt_b_rightpad '!{}   .' '%-6b%c' '\041\x7B\x7d' .
+t_args fmt_b_leftpad  '   !{}.' '%6b%c'  '\041\x7B\x7d' .
+
+t_args fmt_c 'foo' %c f oo oooo
+
+t_args fmt_d 10, %d, 10
+t_args fmt_Ld 10, %Ld, 10
+t_args fmt_i 10, %i, 10
+t_args fmt_o 12, %o, 10
+t_args fmt_u 10, %u, 10
+t_args fmt_x a, %x, 10
+t_args fmt_X A, %X, 10
+
+t_args fmt_e '1.000000e+01,' %e, 10
+t_args fmt_E '1.000000E+01,' %E, 10
+t_args fmt_f '10.000000,' %f, 10
+t_args fmt_Lf '10.000000,' %Lf, 10
+t_args fmt_F '10.000000,' %F, 10
+t_args fmt_g '10,' %g, 10
+t_args fmt_G '10,' %G, 10
+t_args fmt_a '0x1.4p+3,' %a, 10
+t_args fmt_A '0X1.4P+3,' %A, 10
+
+t_args nofmtarg '[
+]' '[%s\n]'
+
+t_args nofmtconv 'foobar
+' 'foobar\n' 1 2 3
+
+t_args nofmtconv_caret 'foo bar' 'foo \cHbar'
+
+t_args precision_s 'abcde' '%.5s' abcdefghijklmnopqrstuvwxyz

M	cmd/printf.1	54	+++++++++++-------------------------------------------
M	cmd/printf.c	1099	++++++++++++++++++++++++++++++++-----------------------------------------------
M	common.mk	2	+-
M	test-cmd/printf.sh	50	++++++++++++++++++++++++++++++++++++++++++--------