logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

printf.c (15571B)


  1. /*-
  2. * SPDX-License-Identifier: BSD-3-Clause
  3. *
  4. * Copyright 2018 Staysail Systems, Inc. <info@staysail.tech>
  5. * Copyright 2014 Garrett D'Amore <garrett@damore.org>
  6. * Copyright 2010 Nexenta Systems, Inc. All rights reserved.
  7. * Copyright (c) 1989, 1993
  8. * The Regents of the University of California. All rights reserved.
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. * 1. Redistributions of source code must retain the above copyright
  14. * notice, this list of conditions and the following disclaimer.
  15. * 2. Redistributions in binary form must reproduce the above copyright
  16. * notice, this list of conditions and the following disclaimer in the
  17. * documentation and/or other materials provided with the distribution.
  18. * 3. Neither the name of the University nor the names of its contributors
  19. * may be used to endorse or promote products derived from this software
  20. * without specific prior written permission.
  21. *
  22. * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25. * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32. * SUCH DAMAGE.
  33. */
  34. /*
  35. * Important: This file is used both as a standalone program /usr/bin/printf
  36. * and as a builtin for /bin/sh (#define SHELL).
  37. */
  38. #define _POSIX_C_SOURCE 200809L
  39. #include "../lib/err.h"
  40. #include "../lib/getopt_nolong.h"
  41. #include <assert.h>
  42. #include <ctype.h>
  43. #include <errno.h>
  44. #include <inttypes.h>
  45. #include <limits.h>
  46. #include <locale.h>
  47. #include <stdio.h>
  48. #include <stdlib.h>
  49. #include <string.h>
  50. #include <sys/types.h>
  51. #include <unistd.h>
  52. #include <wchar.h>
  53. #define PF(f, func) \
  54. do \
  55. { \
  56. if(havewidth) \
  57. if(haveprec) \
  58. (void)printf(f, fieldwidth, precision, func); \
  59. else \
  60. (void)printf(f, fieldwidth, func); \
  61. else if(haveprec) \
  62. (void)printf(f, precision, func); \
  63. else \
  64. (void)printf(f, func); \
  65. } while(0)
  66. static int asciicode(void);
  67. static char *printf_doformat(char *, int *);
  68. static int escape(char *, int, size_t *);
  69. static int getchr(void);
  70. static int getfloating(long double *, int);
  71. static int getint(int *);
  72. static int getnum(intmax_t *, uintmax_t *, int);
  73. static const char *getstr(void);
  74. static char *mknum(char *, char);
  75. static void usage(void);
  76. static const char digits[] = "0123456789";
  77. static char end_fmt[1];
  78. static int myargc;
  79. static char **myargv;
  80. static char **gargv;
  81. static char **maxargv;
  82. const char *argv0 = "printf";
  83. int
  84. main(int argc, char *argv[])
  85. {
  86. size_t len;
  87. int end, rval;
  88. char *format, *fmt, *start;
  89. char *lc_all = setlocale(LC_ALL, "");
  90. if(lc_all == NULL)
  91. {
  92. fprintf(stderr,
  93. "%s: warning: Failed loading locales. setlocale(LC_ALL, \"\"): %s\n",
  94. argv0,
  95. strerror(errno));
  96. }
  97. for(int c = -1; (c = getopt_nolong(argc, argv, "")) != -1;)
  98. {
  99. switch(c)
  100. {
  101. case '?':
  102. default:
  103. usage();
  104. return (1);
  105. }
  106. }
  107. argc -= optind;
  108. argv += optind;
  109. if(argc < 1)
  110. {
  111. usage();
  112. return (1);
  113. }
  114. /*
  115. * Basic algorithm is to scan the format string for conversion
  116. * specifications -- once one is found, find out if the field
  117. * width or precision is a '*'; if it is, gather up value. Note,
  118. * format strings are reused as necessary to use up the provided
  119. * arguments, arguments of zero/null string are provided to use
  120. * up the format string.
  121. */
  122. fmt = format = *argv;
  123. escape(fmt, 1, &len); /* backslash interpretation */
  124. rval = end = 0;
  125. gargv = ++argv;
  126. for(;;)
  127. {
  128. maxargv = gargv;
  129. myargv = gargv;
  130. for(myargc = 0; gargv[myargc]; myargc++)
  131. /* nop */;
  132. start = fmt;
  133. while(fmt < format + len)
  134. {
  135. if(fmt[0] == '%')
  136. {
  137. fwrite(start, 1, fmt - start, stdout);
  138. if(fmt[1] == '%')
  139. {
  140. /* %% prints a % */
  141. putchar('%');
  142. fmt += 2;
  143. }
  144. else
  145. {
  146. fmt = printf_doformat(fmt, &rval);
  147. if(fmt == NULL || fmt == end_fmt)
  148. {
  149. return (fmt == NULL ? 1 : rval);
  150. }
  151. end = 0;
  152. }
  153. start = fmt;
  154. }
  155. else
  156. fmt++;
  157. if(gargv > maxargv) maxargv = gargv;
  158. }
  159. gargv = maxargv;
  160. if(end == 1)
  161. {
  162. utils_warnx("missing format character");
  163. return (1);
  164. }
  165. fwrite(start, 1, fmt - start, stdout);
  166. if(!*gargv)
  167. {
  168. return (rval);
  169. }
  170. /* Restart at the beginning of the format string. */
  171. fmt = format;
  172. end = 1;
  173. }
  174. /* NOTREACHED */
  175. }
  176. static char *
  177. printf_doformat(char *fmt, int *rval)
  178. {
  179. static const char skip1[] = "#'-+ 0";
  180. int fieldwidth, haveprec, havewidth, mod_ldbl, precision;
  181. char convch, nextch;
  182. char start[strlen(fmt) + 1];
  183. char **fargv;
  184. char *dptr;
  185. int l;
  186. dptr = start;
  187. *dptr++ = '%';
  188. *dptr = 0;
  189. fmt++;
  190. /* look for "n$" field index specifier */
  191. l = strspn(fmt, digits);
  192. if((l > 0) && (fmt[l] == '$'))
  193. {
  194. int idx = atoi(fmt);
  195. if(idx <= myargc)
  196. {
  197. gargv = &myargv[idx - 1];
  198. }
  199. else
  200. {
  201. gargv = &myargv[myargc];
  202. }
  203. if(gargv > maxargv) maxargv = gargv;
  204. fmt += l + 1;
  205. /* save format argument */
  206. fargv = gargv;
  207. }
  208. else
  209. {
  210. fargv = NULL;
  211. }
  212. /* skip to field width */
  213. while(*fmt && strchr(skip1, *fmt) != NULL)
  214. {
  215. *dptr++ = *fmt++;
  216. *dptr = 0;
  217. }
  218. if(*fmt == '*')
  219. {
  220. fmt++;
  221. l = strspn(fmt, digits);
  222. if((l > 0) && (fmt[l] == '$'))
  223. {
  224. int idx = atoi(fmt);
  225. if(fargv == NULL)
  226. {
  227. utils_warnx("incomplete use of n$");
  228. return (NULL);
  229. }
  230. if(idx <= myargc)
  231. {
  232. gargv = &myargv[idx - 1];
  233. }
  234. else
  235. {
  236. gargv = &myargv[myargc];
  237. }
  238. fmt += l + 1;
  239. }
  240. else if(fargv != NULL)
  241. {
  242. utils_warnx("incomplete use of n$");
  243. return (NULL);
  244. }
  245. if(getint(&fieldwidth)) return (NULL);
  246. if(gargv > maxargv) maxargv = gargv;
  247. havewidth = 1;
  248. *dptr++ = '*';
  249. *dptr = 0;
  250. }
  251. else
  252. {
  253. havewidth = 0;
  254. /* skip to possible '.', get following precision */
  255. while(isdigit(*fmt))
  256. {
  257. *dptr++ = *fmt++;
  258. *dptr = 0;
  259. }
  260. }
  261. if(*fmt == '.')
  262. {
  263. /* precision present? */
  264. fmt++;
  265. *dptr++ = '.';
  266. if(*fmt == '*')
  267. {
  268. fmt++;
  269. l = strspn(fmt, digits);
  270. if((l > 0) && (fmt[l] == '$'))
  271. {
  272. int idx = atoi(fmt);
  273. if(fargv == NULL)
  274. {
  275. utils_warnx("incomplete use of n$");
  276. return (NULL);
  277. }
  278. if(idx <= myargc)
  279. {
  280. gargv = &myargv[idx - 1];
  281. }
  282. else
  283. {
  284. gargv = &myargv[myargc];
  285. }
  286. fmt += l + 1;
  287. }
  288. else if(fargv != NULL)
  289. {
  290. utils_warnx("incomplete use of n$");
  291. return (NULL);
  292. }
  293. if(getint(&precision)) return (NULL);
  294. if(gargv > maxargv) maxargv = gargv;
  295. haveprec = 1;
  296. *dptr++ = '*';
  297. *dptr = 0;
  298. }
  299. else
  300. {
  301. haveprec = 0;
  302. /* skip to conversion char */
  303. while(isdigit(*fmt))
  304. {
  305. *dptr++ = *fmt++;
  306. *dptr = 0;
  307. }
  308. }
  309. }
  310. else
  311. haveprec = 0;
  312. if(!*fmt)
  313. {
  314. utils_warnx("missing format character");
  315. return (NULL);
  316. }
  317. *dptr++ = *fmt;
  318. *dptr = 0;
  319. /*
  320. * Look for a length modifier. POSIX doesn't have these, so
  321. * we only support them for floating-point conversions, which
  322. * are extensions. This is useful because the L modifier can
  323. * be used to gain extra range and precision, while omitting
  324. * it is more likely to produce consistent results on different
  325. * architectures. This is not so important for integers
  326. * because overflow is the only bad thing that can happen to
  327. * them, but consider the command printf %a 1.1
  328. */
  329. if(*fmt == 'L')
  330. {
  331. mod_ldbl = 1;
  332. fmt++;
  333. if(!strchr("aAeEfFgG", *fmt))
  334. {
  335. utils_warnx("bad modifier L for %%%c", *fmt);
  336. return (NULL);
  337. }
  338. }
  339. else
  340. {
  341. mod_ldbl = 0;
  342. }
  343. /* save the current arg offset, and set to the format arg */
  344. if(fargv != NULL)
  345. {
  346. gargv = fargv;
  347. }
  348. convch = *fmt;
  349. nextch = *++fmt;
  350. *fmt = '\0';
  351. switch(convch)
  352. {
  353. case 'b':
  354. {
  355. size_t len;
  356. char *p;
  357. int getout;
  358. /* Convert "b" to "s" for output. */
  359. start[strlen(start) - 1] = 's';
  360. if((p = strdup(getstr())) == NULL)
  361. {
  362. utils_warnx("%s", strerror(ENOMEM));
  363. return (NULL);
  364. }
  365. getout = escape(p, 0, &len);
  366. PF(start, p);
  367. /* Restore format for next loop. */
  368. free(p);
  369. if(getout) return (end_fmt);
  370. break;
  371. }
  372. case 'c':
  373. {
  374. char p;
  375. p = getchr();
  376. if(p != '\0') PF(start, p);
  377. break;
  378. }
  379. case 's':
  380. {
  381. const char *p;
  382. p = getstr();
  383. PF(start, p);
  384. break;
  385. }
  386. case 'd':
  387. case 'i':
  388. case 'o':
  389. case 'u':
  390. case 'x':
  391. case 'X':
  392. {
  393. char *f;
  394. intmax_t val;
  395. uintmax_t uval;
  396. int signedconv;
  397. signedconv = (convch == 'd' || convch == 'i');
  398. if((f = mknum(start, convch)) == NULL) return (NULL);
  399. if(getnum(&val, &uval, signedconv)) *rval = 1;
  400. if(signedconv)
  401. PF(f, val);
  402. else
  403. PF(f, uval);
  404. break;
  405. }
  406. case 'e':
  407. case 'E':
  408. case 'f':
  409. case 'F':
  410. case 'g':
  411. case 'G':
  412. case 'a':
  413. case 'A':
  414. {
  415. long double p;
  416. if(getfloating(&p, mod_ldbl)) *rval = 1;
  417. if(mod_ldbl)
  418. PF(start, p);
  419. else
  420. PF(start, (double)p);
  421. break;
  422. }
  423. default:
  424. utils_warnx("illegal format character '%c'", convch);
  425. return (NULL);
  426. }
  427. *fmt = nextch;
  428. /* return the gargv to the next element */
  429. return (fmt);
  430. }
  431. static char *
  432. mknum(char *str, char ch)
  433. {
  434. static char *copy;
  435. static size_t copy_size;
  436. char *newcopy;
  437. size_t len, newlen;
  438. len = strlen(str) + 2;
  439. if(len > copy_size)
  440. {
  441. newlen = len + 1023;
  442. assert(newlen != 0);
  443. if((newcopy = realloc(copy, newlen)) == NULL)
  444. {
  445. utils_warnx("%s", strerror(ENOMEM));
  446. return (NULL);
  447. }
  448. copy = newcopy;
  449. copy_size = newlen;
  450. }
  451. memmove(copy, str, len - 3);
  452. copy[len - 3] = 'j';
  453. copy[len - 2] = ch;
  454. copy[len - 1] = '\0';
  455. return (copy);
  456. }
  457. static int
  458. escape(char *fmt, int percent, size_t *len)
  459. {
  460. char *save, *store, c;
  461. int value;
  462. /*
  463. * Required by POSIX.1-2024 for printf: \\ \a \b \f \n \r \t \v \000
  464. *
  465. * <https://pubs.opengroup.org/onlinepubs/9799919799/utilities/V3_chap02.html#tag_19_02_04>
  466. * As inspiration, required by POSIX.1-2024 for dollar-single-quote($'…'):
  467. * \" \' \\ \a \b \e \f \n\ r\ t\ \v \c0 \x00 \000
  468. */
  469. for(save = store = fmt; ((c = *fmt) != 0); ++fmt, ++store)
  470. {
  471. if(c != '\\')
  472. {
  473. *store = c;
  474. continue;
  475. }
  476. switch(*++fmt)
  477. {
  478. case '\0': /* EOS, user error */
  479. *store = '\\';
  480. *++store = '\0';
  481. *len = store - save;
  482. return (0);
  483. case '\\': /* backslash */
  484. case '\'': /* single quote */
  485. *store = *fmt;
  486. break;
  487. case 'a': /* bell/alert */
  488. *store = '\a';
  489. break;
  490. case 'b': /* backspace */
  491. *store = '\b';
  492. break;
  493. case 'c':
  494. if(!percent)
  495. {
  496. *store = '\0';
  497. *len = store - save;
  498. return (1);
  499. }
  500. /* Assumes ASCII */
  501. if(fmt[1] == '?')
  502. {
  503. fmt++;
  504. *store = '\177';
  505. }
  506. else if(fmt[1] >= 'a' && fmt[1] <= 'z')
  507. {
  508. fmt++;
  509. *store = (fmt[0] - 'a') + 1;
  510. }
  511. else if(fmt[1] >= '@' && fmt[1] <= '_')
  512. {
  513. fmt++;
  514. *store = (fmt[0] - '@');
  515. }
  516. else
  517. {
  518. *store = 'c';
  519. }
  520. break;
  521. case 'e': /* escape */
  522. *store = '\033';
  523. break;
  524. case 'f': /* form-feed */
  525. *store = '\f';
  526. break;
  527. case 'n': /* newline */
  528. *store = '\n';
  529. break;
  530. case 'r': /* carriage-return */
  531. *store = '\r';
  532. break;
  533. case 't': /* horizontal tab */
  534. *store = '\t';
  535. break;
  536. case 'v': /* vertical tab */
  537. *store = '\v';
  538. break;
  539. case 'x': /* hex */
  540. c = 2;
  541. fmt++;
  542. for(value = 0; c-- && isxdigit(*fmt); ++fmt)
  543. {
  544. value <<= 4;
  545. if(*fmt <= '9')
  546. value += *fmt - '0';
  547. else if(*fmt <= 'F')
  548. value += *fmt - 'A' + 10;
  549. else
  550. value += *fmt - 'a' + 10;
  551. }
  552. --fmt;
  553. *store = (char)value;
  554. break;
  555. /* octal constant */
  556. case '0':
  557. case '1':
  558. case '2':
  559. case '3':
  560. case '4':
  561. case '5':
  562. case '6':
  563. case '7':
  564. c = (!percent && *fmt == '0') ? 4 : 3;
  565. for(value = 0; c-- && *fmt >= '0' && *fmt <= '7'; ++fmt)
  566. {
  567. value <<= 3;
  568. value += *fmt - '0';
  569. }
  570. --fmt;
  571. if(percent && value == '%')
  572. {
  573. *store++ = '%';
  574. *store = '%';
  575. }
  576. else
  577. *store = (char)value;
  578. break;
  579. default:
  580. *store = *fmt;
  581. break;
  582. }
  583. }
  584. *store = '\0';
  585. *len = store - save;
  586. return (0);
  587. }
  588. static int
  589. getchr(void)
  590. {
  591. if(!gargv || !*gargv) return ('\0');
  592. return ((int)**gargv++);
  593. }
  594. static const char *
  595. getstr(void)
  596. {
  597. if(!gargv || !*gargv) return ("");
  598. return (*gargv++);
  599. }
  600. static int
  601. getint(int *ip)
  602. {
  603. intmax_t val;
  604. uintmax_t uval;
  605. int rval;
  606. if(getnum(&val, &uval, 1)) return (1);
  607. rval = 0;
  608. if(val < INT_MIN || val > INT_MAX)
  609. {
  610. utils_warnx("%s: %s", *gargv, strerror(ERANGE));
  611. rval = 1;
  612. }
  613. *ip = (int)val;
  614. return (rval);
  615. }
  616. static int
  617. getnum(intmax_t *ip, uintmax_t *uip, int signedconv)
  618. {
  619. char *ep;
  620. int rval;
  621. if(!gargv || !*gargv)
  622. {
  623. *ip = *uip = 0;
  624. return (0);
  625. }
  626. if(**gargv == '"' || **gargv == '\'')
  627. {
  628. if(signedconv)
  629. *ip = asciicode();
  630. else
  631. *uip = asciicode();
  632. return (0);
  633. }
  634. rval = 0;
  635. errno = 0;
  636. if(signedconv)
  637. *ip = strtoimax(*gargv, &ep, 0);
  638. else
  639. *uip = strtoumax(*gargv, &ep, 0);
  640. if(ep == *gargv)
  641. {
  642. utils_warnx("%s: expected numeric value", *gargv);
  643. rval = 1;
  644. }
  645. else if(*ep != '\0')
  646. {
  647. utils_warnx("%s: not completely converted", *gargv);
  648. rval = 1;
  649. }
  650. if(errno == ERANGE)
  651. {
  652. utils_warnx("%s: %s", *gargv, strerror(ERANGE));
  653. rval = 1;
  654. }
  655. ++gargv;
  656. return (rval);
  657. }
  658. static int
  659. getfloating(long double *dp, int mod_ldbl)
  660. {
  661. char *ep;
  662. int rval;
  663. if(!*gargv)
  664. {
  665. *dp = 0.0;
  666. return (0);
  667. }
  668. if(**gargv == '"' || **gargv == '\'')
  669. {
  670. *dp = asciicode();
  671. return (0);
  672. }
  673. rval = 0;
  674. errno = 0;
  675. if(mod_ldbl)
  676. *dp = strtold(*gargv, &ep);
  677. else
  678. *dp = strtod(*gargv, &ep);
  679. if(ep == *gargv)
  680. {
  681. utils_warnx("%s: expected numeric value", *gargv);
  682. rval = 1;
  683. }
  684. else if(*ep != '\0')
  685. {
  686. utils_warnx("%s: not completely converted", *gargv);
  687. rval = 1;
  688. }
  689. if(errno == ERANGE)
  690. {
  691. utils_warnx("%s: %s", *gargv, strerror(ERANGE));
  692. rval = 1;
  693. }
  694. ++gargv;
  695. return (rval);
  696. }
  697. static int
  698. asciicode(void)
  699. {
  700. int ch;
  701. wchar_t wch;
  702. mbstate_t mbs;
  703. ch = (unsigned char)**gargv;
  704. if(ch == '\'' || ch == '"')
  705. {
  706. memset(&mbs, 0, sizeof(mbs));
  707. switch(mbrtowc(&wch, *gargv + 1, MB_LEN_MAX, &mbs))
  708. {
  709. case(size_t)-2:
  710. case(size_t)-1:
  711. wch = (unsigned char)gargv[0][1];
  712. break;
  713. case 0:
  714. wch = 0;
  715. break;
  716. }
  717. ch = wch;
  718. }
  719. ++gargv;
  720. return (ch);
  721. }
  722. static void
  723. usage(void)
  724. {
  725. (void)fprintf(stderr, "usage: printf format [arguments ...]\n");
  726. }