logo

utils-std

Collection of commonly available Unix tools
commit: 2f6f709d89a3e1980797458296714175ad7d8db6
parent f9196e3f0294f37c36c90faaeb9440344dbdbaf7
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Tue, 23 Apr 2024 19:26:43 +0200

cmd/tr: import from OpenBSD

Diffstat:

MMakefile4++++
Acmd/tr.1365+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/tr.c229+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcoreutils.txt2+-
Alib/tr_str.c348+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alib/tr_str.h63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlsb_commands.txt2+-
Mmakeless.sh1+
Mposix_utilities.txt2+-
9 files changed, 1013 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile @@ -117,3 +117,7 @@ cmd/rm: cmd/rm.c lib/consent.c lib/consent.h Makefile cmd/truncate: cmd/truncate.c lib/truncation.c lib/truncation.h Makefile rm -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno} $(CC) -std=c99 $(CFLAGS) -o $@ cmd/truncate.c lib/truncation.c $(LDFLAGS) $(LDSTATIC) + +cmd/tr: cmd/tr.c lib/tr_str.c lib/tr_str.h Makefile + rm -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno} + $(CC) -std=c99 $(CFLAGS) -o $@ cmd/tr.c lib/tr_str.c $(LDFLAGS) $(LDSTATIC) diff --git a/cmd/tr.1 b/cmd/tr.1 @@ -0,0 +1,365 @@ +.\" SPDX-License-Identifier: BSD-3-Clause +.\" $OpenBSD: tr.1,v 1.25 2015/02/28 21:51:57 bentley Exp $ +.\" $NetBSD: tr.1,v 1.5 1994/12/07 08:35:13 jtc Exp $ +.\" +.\" Copyright (c) 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" This code is derived from software contributed to Berkeley by +.\" the Institute of Electrical and Electronics Engineers, Inc. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)tr.1 8.1 (Berkeley) 6/6/93 +.\" +.Dd $Mdocdate: February 28 2015 $ +.Dt TR 1 +.Os +.Sh NAME +.Nm tr +.Nd translate characters +.Sh SYNOPSIS +.Nm tr +.Op Fl Ccs +.Ar string1 string2 +.Nm tr +.Op Fl Cc +.Fl d +.Ar string1 +.Nm tr +.Op Fl Cc +.Fl s +.Ar string1 +.Nm tr +.Op Fl Cc +.Fl ds +.Ar string1 string2 +.Sh DESCRIPTION +The +.Nm +utility copies the standard input to the standard output with substitution +or deletion of selected characters. +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Fl C +Complements the set of characters in +.Ar string1 ; +for instance, +.Dq -C\ ab +includes every character except for +.Sq a +and +.Sq b . +.It Fl c +The same as +.Fl C . +.It Fl d +The +.Fl d +option causes characters to be deleted from the input. +.It Fl s +The +.Fl s +option squeezes multiple occurrences of the characters listed in the last +operand (either +.Ar string1 +or +.Ar string2 ) +in the input into a single instance of the character. +This occurs after all deletion and translation is completed. +.El +.Pp +In the first synopsis form, the characters in +.Ar string1 +are translated into the characters in +.Ar string2 +where the first character in +.Ar string1 +is translated into the first character in +.Ar string2 +and so on. +If +.Ar string1 +is longer than +.Ar string2 , +the last character found in +.Ar string2 +is duplicated until +.Ar string1 +is exhausted. +.Pp +In the second synopsis form, the characters in +.Ar string1 +are deleted from the input. +.Pp +In the third synopsis form, the characters in +.Ar string1 +are compressed as described for the +.Fl s +option. +.Pp +In the fourth synopsis form, the characters in +.Ar string1 +are deleted from the input, and the characters in +.Ar string2 +are compressed as described for the +.Fl s +option. +.Pp +The following conventions can be used in +.Ar string1 +and +.Ar string2 +to specify sets of characters: +.Bl -tag -width [:equiv:] +.It character +Any character not described by one of the following conventions +represents itself. +.It \eoctal +A backslash followed by 1, 2, or 3 octal digits represents a character +with that encoded value. +To follow an octal sequence with a digit as a character, left zero-pad +the octal sequence to the full 3 octal digits. +.It \echaracter +A backslash followed by certain special characters maps to special +values. +.Pp +.Bl -tag -width "nn" -offset indent -compact +.It \ea +<alert character> +.It \eb +<backspace> +.It \ef +<form-feed> +.It \en +<newline> +.It \er +<carriage return> +.It \et +<tab> +.It \ev +<vertical tab> +.El +.Pp +A backslash followed by any other character maps to that character. +.It c-c +Represents the range of characters between the range endpoints, inclusively. +.It [:class:] +Represents all characters belonging to the defined character class. +Class names are: +.Pp +.Bl -tag -width "xdigit" -offset indent -compact +.It alnum +<alphanumeric characters> +.It alpha +<alphabetic characters> +.It blank +<blank characters> +.It cntrl +<control characters> +.It digit +<numeric characters> +.It graph +<graphic characters> +.It lower +<lower-case alphabetic characters> +.It print +<printable characters> +.It punct +<punctuation characters> +.It space +<space characters> +.It upper +<upper-case characters> +.It xdigit +<hexadecimal characters> +.El +.Pp +.\" All classes may be used in +.\" .Ar string1 , +.\" and in +.\" .Ar string2 +.\" when both the +.\" .Fl d +.\" and +.\" .Fl s +.\" options are specified. +.\" Otherwise, only the classes ``upper'' and ``lower'' may be used in +.\" .Ar string2 +.\" and then only when the corresponding class (``upper'' for ``lower'' +.\" and vice-versa) is specified in the same relative position in +.\" .Ar string1 . +.\" .Pp +With the exception of the +.Dq upper +and +.Dq lower +classes, characters +in the classes are in unspecified order. +In the +.Dq upper +and +.Dq lower +classes, characters are entered in +ascending order. +.Pp +For specific information as to which ASCII characters are included +in these classes, see +.Xr isalnum 3 , +.Xr isalpha 3 , +and related manual pages. +.It [=equiv=] +Represents all characters or collating (sorting) elements belonging to +the same equivalence class as +.Ar equiv . +If +there is a secondary ordering within the equivalence class, the characters +are ordered in ascending sequence. +Otherwise, they are ordered after their encoded values. +An example of an equivalence class might be +.Dq c +and +.Dq ch +in Spanish; +English has no equivalence classes. +.It [#*n] +Represents +.Ar n +repeated occurrences of the character represented by +.Ar # . +This +expression is only valid when it occurs in +.Ar string2 . +If +.Ar n +is omitted or is zero, it is interpreted as large enough to extend the +.Ar string2 +sequence to the length of +.Ar string1 . +If +.Ar n +has a leading zero, it is interpreted as an octal value; otherwise, +it's interpreted as a decimal value. +.El +.Sh EXIT STATUS +.Ex -std tr +.Sh EXAMPLES +The following examples are shown as given to the shell: +.Pp +Create a list of the words in file1, one per line, where a word is taken to +be a maximal string of letters. +.Pp +.Dl $ tr -cs Qo [:alpha:] Qc Qo \en Qc < file1 +.Pp +Translate the contents of file1 to upper-case. +.Pp +.Dl $ tr Qo [:lower:] Qc Qo [:upper:] Qc < file1 +.Pp +Strip out non-printable characters from file1. +.Pp +.Dl $ tr -cd Qo [:print:] Qc < file1 +.Sh SEE ALSO +.Xr sed 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification, +except that the +.Fl C +option behaves the same as the +.Fl c +option since +.Nm +is not locale-aware. +.Pp +System V has historically implemented character ranges using the syntax +.Dq [c-c] +instead of the +.Dq c-c +used by historic +.Bx +implementations and +standardized by POSIX. +System V shell scripts should work under this implementation as long as +the range is intended to map in another range, i.e., the command +.Dq tr [a-z] [A-Z] +will work as it will map the +.Sq \&[ +character in +.Ar string1 +to the +.Sq \&[ +character in +.Ar string2 . +However, if the shell script is deleting or squeezing characters as in +the command +.Dq tr\ -d\ [a-z] , +the characters +.Sq \&[ +and +.Sq \&] +will be +included in the deletion or compression list, which would not have happened +under an historic System V implementation. +Additionally, any scripts that depended on the sequence +.Dq a-z +to represent the three characters +.Sq a , +.Sq - , +and +.Sq z +will have to be rewritten as +.Dq a\e-z . +.Pp +The +.Nm +utility has historically not permitted the manipulation of NUL bytes in +its input and, additionally, has stripped NUL's from its input stream. +This implementation has removed this behavior as a bug. +.Pp +The +.Nm +utility has historically been extremely forgiving of syntax errors: +for example, the +.Fl c +and +.Fl s +options were ignored unless two strings were specified. +This implementation will not permit illegal syntax. +.Pp +It should be noted that the feature wherein the last character of +.Ar string2 +is duplicated if +.Ar string2 +has less characters than +.Ar string1 +is permitted by POSIX but is not required. +Shell scripts attempting to be portable to other POSIX systems should use +the +.Dq [#*] +convention instead of relying on this behavior. diff --git a/cmd/tr.c b/cmd/tr.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* $OpenBSD: tr.c,v 1.22 2022/12/04 23:50:49 cheloha Exp $ */ +/* $NetBSD: tr.c,v 1.5 1995/08/31 22:13:48 jtc Exp $ */ +/* + * Copyright (c) 1988, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#define _BSD_SOURCE + +#include "../lib/tr_str.h" + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> // bzero +#include <sys/types.h> +#include <unistd.h> + +int delete[NCHARS], squeeze[NCHARS]; +int translate[NCHARS] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* ASCII */ + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, +}; + +STR s1 = {STRING1, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL}; +STR s2 = {STRING2, NORMAL, 0, OOBCH, {0, OOBCH}, NULL, NULL}; + +static void setup(int *, char *, STR *, int); +static void usage(void); + +int +main(int argc, char *argv[]) +{ + int ch, cnt, lastch, *p; + int cflag, dflag, sflag; + + cflag = dflag = sflag = 0; + while((ch = getopt(argc, argv, "Ccds")) != -1) + switch(ch) + { + case 'C': + case 'c': + cflag = 1; + break; + case 'd': + dflag = 1; + break; + case 's': + sflag = 1; + break; + default: + usage(); + } + argc -= optind; + argv += optind; + + if(argc < 1 || argc > 2) usage(); + + /* + * tr -ds [-Cc] string1 string2 + * Delete all characters (or complemented characters) in string1. + * Squeeze all characters in string2. + */ + if(dflag && sflag) + { + if(argc != 2) usage(); + + setup(delete, argv[0], &s1, cflag); + setup(squeeze, argv[1], &s2, 0); + + for(lastch = OOBCH; (ch = getchar()) != EOF;) + if(!delete[ch] && (!squeeze[ch] || lastch != ch)) + { + lastch = ch; + (void)putchar(ch); + } + exit(0); + } + + /* + * tr -d [-Cc] string1 + * Delete all characters (or complemented characters) in string1. + */ + if(dflag) + { + if(argc != 1) usage(); + + setup(delete, argv[0], &s1, cflag); + + while((ch = getchar()) != EOF) + if(!delete[ch]) (void)putchar(ch); + exit(0); + } + + /* + * tr -s [-Cc] string1 + * Squeeze all characters (or complemented characters) in string1. + */ + if(sflag && argc == 1) + { + setup(squeeze, argv[0], &s1, cflag); + + for(lastch = OOBCH; (ch = getchar()) != EOF;) + if(!squeeze[ch] || lastch != ch) + { + lastch = ch; + (void)putchar(ch); + } + exit(0); + } + + /* + * tr [-Ccs] string1 string2 + * Replace all characters (or complemented characters) in string1 with + * the character in the same position in string2. If the -s option is + * specified, squeeze all the characters in string2. + */ + if(argc != 2) usage(); + + s1.str = (unsigned char *)argv[0]; + s2.str = (unsigned char *)argv[1]; + + if(cflag) + for(cnt = NCHARS, p = translate; cnt--;) + *p++ = OOBCH; + + if(!next(&s2)) errx(1, "empty string2"); + + /* If string2 runs out of characters, use the last one specified. */ + ch = s2.lastch; + if(sflag) + while(next(&s1)) + { + translate[s1.lastch] = ch = s2.lastch; + squeeze[ch] = 1; + (void)next(&s2); + } + else + while(next(&s1)) + { + translate[s1.lastch] = ch = s2.lastch; + (void)next(&s2); + } + + if(cflag) + for(cnt = 0, p = translate; cnt < NCHARS; ++p, ++cnt) + *p = *p == OOBCH ? ch : cnt; + + if(sflag) + for(lastch = OOBCH; (ch = getchar()) != EOF;) + { + ch = translate[ch]; + if(!squeeze[ch] || lastch != ch) + { + lastch = ch; + (void)putchar(ch); + } + } + else + while((ch = getchar()) != EOF) + (void)putchar(translate[ch]); + exit(0); +} + +static void +setup(int *table, char *arg, STR *str, int cflag) +{ + int cnt, *p; + + str->str = (unsigned char *)arg; + bzero(table, NCHARS * sizeof(int)); + while(next(str)) + table[str->lastch] = 1; + if(cflag) + for(p = table, cnt = NCHARS; cnt--; ++p) + *p = !*p; +} + +static void +usage(void) +{ + fprintf(stderr, + "usage: tr [-Ccs] string1 string2\n" + " tr [-Cc] -d string1\n" + " tr [-Cc] -s string1\n" + " tr [-Cc] -ds string1 string2\n"); + exit(1); +} diff --git a/coreutils.txt b/coreutils.txt @@ -89,7 +89,7 @@ tee: Done test: Done timeout: ? touch: Done -tr: Todo +tr: Done true: Done truncate: Done tsort: ? diff --git a/lib/tr_str.c b/lib/tr_str.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* $OpenBSD: str.c,v 1.15 2023/05/04 16:08:29 tb Exp $ */ +/* $NetBSD: str.c,v 1.7 1995/08/31 22:13:47 jtc Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "./tr_str.h" +#define _BSD_SOURCE + +#include <assert.h> +#include <ctype.h> +#include <err.h> +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +static int backslash(STR *); +static int bracket(STR *); +static int c_class(const void *, const void *); +static void genclass(STR *); +static void genequiv(STR *); +static int genrange(STR *); +static void genseq(STR *); + +int +next(STR *s) +{ + int ch; + + switch(s->state) + { + case EOS: + return (0); + case INFINITE: + return (1); + case NORMAL: + switch(ch = *s->str) + { + case '\0': + s->state = EOS; + return (0); + case '\\': + s->lastch = backslash(s); + break; + case '[': + if(bracket(s)) return (next(s)); + /* FALLTHROUGH */ + default: + ++s->str; + s->lastch = ch; + break; + } + + /* We can start a range at any time. */ + if(s->str[0] == '-' && genrange(s)) return (next(s)); + return (1); + case RANGE: + if(s->cnt-- == 0) + { + s->state = NORMAL; + return (next(s)); + } + ++s->lastch; + return (1); + case SEQUENCE: + if(s->cnt-- == 0) + { + s->state = NORMAL; + return (next(s)); + } + return (1); + case SET: + if((s->lastch = s->set[s->cnt++]) == OOBCH) + { + s->state = NORMAL; + return (next(s)); + } + return (1); + default: + return 0; + } + /* NOTREACHED */ +} + +static int +bracket(STR *s) +{ + char *p; + + switch(s->str[1]) + { + case ':': /* "[:class:]" */ + if((p = strstr((char *)s->str + 2, ":]")) == NULL) return (0); + *p = '\0'; + s->str += 2; + genclass(s); + s->str = (unsigned char *)p + 2; + return (1); + case '=': /* "[=equiv=]" */ + if((p = strstr((char *)s->str + 2, "=]")) == NULL) return (0); + s->str += 2; + genequiv(s); + return (1); + default: /* "[\###*n]" or "[#*n]" */ + if((p = strpbrk((char *)s->str + 2, "*]")) == NULL) return (0); + if(p[0] != '*' || strchr(p, ']') == NULL) return (0); + s->str += 1; + genseq(s); + return (1); + } + /* NOTREACHED */ +} + +typedef struct +{ + char *name; + int (*func)(int); + int *set; +} CLASS; + +static CLASS classes[] = { + /* clang-format off */ + { "alnum", isalnum, }, + { "alpha", isalpha, }, + { "blank", isblank, }, + { "cntrl", iscntrl, }, + { "digit", isdigit, }, + { "graph", isgraph, }, + { "lower", islower, }, + { "print", isprint, }, + { "punct", ispunct, }, + { "space", isspace, }, + { "upper", isupper, }, + { "xdigit", isxdigit, }, + /* clang-format on */ +}; + +static void +genclass(STR *s) +{ + CLASS *cp, tmp; + size_t len; + int i; + + tmp.name = (char *)s->str; + if((cp = (CLASS *)bsearch( + &tmp, classes, sizeof(classes) / sizeof(CLASS), sizeof(CLASS), c_class)) == NULL) + errx(1, "unknown class %s", s->str); + + /* + * Generate the set of characters in the class if we haven't + * already done so. + */ + if(cp->set == NULL) + { + cp->set = reallocarray(NULL, NCHARS + 1, sizeof(*cp->set)); + if(cp->set == NULL) err(1, NULL); + len = 0; + for(i = 0; i < NCHARS; i++) + { + if(cp->func(i)) + { + cp->set[len] = i; + len++; + } + } + cp->set[len] = OOBCH; + len++; + cp->set = reallocarray(cp->set, len, sizeof(*cp->set)); + if(cp->set == NULL) err(1, NULL); + } + + s->cnt = 0; + s->state = SET; + s->set = cp->set; +} + +static int +c_class(const void *a, const void *b) +{ + return (strcmp(((CLASS *)a)->name, ((CLASS *)b)->name)); +} + +/* + * English doesn't have any equivalence classes, so for now + * we just syntax check and grab the character. + */ +static void +genequiv(STR *s) +{ + if(*s->str == '\\') + { + s->equiv[0] = backslash(s); + if(*s->str != '=') errx(1, "misplaced equivalence equals sign"); + } + else + { + s->equiv[0] = s->str[0]; + if(s->str[1] != '=') errx(1, "misplaced equivalence equals sign"); + } + s->str += 2; + s->cnt = 0; + s->state = SET; + s->set = s->equiv; +} + +static int +genrange(STR *s) +{ + int stopval; + unsigned char *savestart; + + savestart = s->str; + stopval = *++s->str == '\\' ? backslash(s) : *s->str++; + if(stopval < (u_char)s->lastch) + { + s->str = savestart; + return (0); + } + s->cnt = stopval - s->lastch + 1; + s->state = RANGE; + --s->lastch; + return (1); +} + +static void +genseq(STR *s) +{ + char *ep; + + if(s->which == STRING1) errx(1, "sequences only valid in string2"); + + if(*s->str == '\\') + s->lastch = backslash(s); + else + s->lastch = *s->str++; + if(*s->str != '*') errx(1, "misplaced sequence asterisk"); + + switch(*++s->str) + { + case '\\': + s->cnt = backslash(s); + break; + case ']': + s->cnt = 0; + ++s->str; + break; + default: + if(isdigit(*s->str)) + { + s->cnt = strtol((char *)s->str, &ep, 0); + if(*ep == ']') + { + s->str = (unsigned char *)ep + 1; + break; + } + } + errx(1, "illegal sequence count"); + /* NOTREACHED */ + } + + s->state = s->cnt ? SEQUENCE : INFINITE; +} + +/* + * Translate \??? into a character. Up to 3 octal digits, if no digits either + * an escape code or a literal character. + */ +static int +backslash(STR *s) +{ + size_t i; + int ch, val; + + assert(*s->str == '\\'); + s->str++; + + /* Empty escapes become plain backslashes. */ + if(*s->str == '\0') + { + s->state = EOS; + return ('\\'); + } + + val = 0; + for(i = 0; i < 3; i++) + { + if(s->str[i] < '0' || '7' < s->str[i]) break; + val = val * 8 + s->str[i] - '0'; + } + if(i > 0) + { + if(val > UCHAR_MAX) errx(1, "octal value out of range: %d", val); + s->str += i; + return (val); + } + + ch = *s->str++; + switch(ch) + { + case 'a': /* escape characters */ + return ('\7'); + case 'b': + return ('\b'); + case 'f': + return ('\f'); + case 'n': + return ('\n'); + case 'r': + return ('\r'); + case 't': + return ('\t'); + case 'v': + return ('\13'); + default: /* \x" -> x */ + return (ch); + } +} diff --git a/lib/tr_str.h b/lib/tr_str.h @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* $OpenBSD: extern.h,v 1.6 2003/06/03 02:56:20 millert Exp $ */ +/* $NetBSD: extern.h,v 1.4 1995/11/01 00:45:22 pk Exp $ */ + +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)extern.h 8.1 (Berkeley) 6/6/93 + */ + +typedef struct +{ + enum + { + STRING1, + STRING2 + } which; + enum + { + EOS, + INFINITE, + NORMAL, + RANGE, + SEQUENCE, + SET + } state; + int cnt; /* character count */ + int lastch; /* last character */ + int equiv[2]; /* equivalence set */ + int *set; /* set of characters */ + unsigned char *str; /* user's string */ +} STR; + +#include <limits.h> +#define NCHARS (UCHAR_MAX + 1) /* Number of possible characters. */ +#define OOBCH (UCHAR_MAX + 1) /* Out of band character value. */ + +int next(STR *); diff --git a/lsb_commands.txt b/lsb_commands.txt @@ -125,7 +125,7 @@ tic: out of scope time: Done touch: Todo (futimens(3p)) tput: out of scope -tr: Todo +tr: Done true: Done tsort: ? tty: Done diff --git a/makeless.sh b/makeless.sh @@ -43,6 +43,7 @@ $CC -std=c99 $CFLAGS -o cmd/tee cmd/tee.c $LDFLAGS $LDSTATIC $CC -std=c99 $CFLAGS -o cmd/test cmd/test.c $LDFLAGS $LDSTATIC $CC -std=c99 $CFLAGS -o cmd/time cmd/time.c $LDFLAGS $LDSTATIC $CC -std=c99 $CFLAGS -o cmd/touch cmd/touch.c lib/iso_parse.c $LDFLAGS $LDSTATIC +$CC -std=c99 $CFLAGS -o cmd/tr cmd/tr.c lib/tr_str.c $LDFLAGS $LDSTATIC $CC -std=c99 $CFLAGS -o cmd/true cmd/true.c $LDFLAGS $LDSTATIC $CC -std=c99 $CFLAGS -o cmd/truncate cmd/truncate.c lib/truncation.c $LDFLAGS $LDSTATIC $CC -std=c99 $CFLAGS -o cmd/tty cmd/tty.c $LDFLAGS $LDSTATIC diff --git a/posix_utilities.txt b/posix_utilities.txt @@ -133,7 +133,7 @@ test: done time: done touch: done tput: no, external -tr +tr: done true: done tsort tty: done