logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git
commit: 68115096138b112147503b11559573256cae2e47
parent 2a3955731ccffee4cb95ce17eeffde861c9a70e0
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Tue,  3 Sep 2024 16:35:58 +0200

cmd/join: drop compatibility layer for historical options

Using https://codesearch.debian.net/ in regex mode to search
for historical usage:

* `\bjoin (--?[^a] \w+ )?-a\b` => no usage of operand-less -a
* `\bjoin (--?[^j] \w+ )?-j` => <5 actual hits, likely patcheable
* `\bjoin (--?[^o] \w+ )?-o` => <5 hits, easily patcheable

I don't think it's worth keeping non-POSIX option parsing in place.
Historical -j [num] managed by getopt() can stay for now.

Diffstat:

Mcmd/join.156++++++++++----------------------------------------------
Mcmd/join.c89+------------------------------------------------------------------------------
2 files changed, 11 insertions(+), 134 deletions(-)

diff --git a/cmd/join.1 b/cmd/join.1 @@ -231,57 +231,21 @@ Also see the note about .Xr sort 1 above to understand why we need to sort the intermediate result. .Bd -literal -offset indent -$ join -t, -1 2 -o 1.2 2.2 nobel_laureates.txt nobel_nationalities.txt | \e - sort -k2 -t, | join -t, -e "<<NULL>>" -1 2 -o 1.1 2.2 - capitals.txt +$ join -t, -1 2 -o "1.2 2.2" nobel_laureates.txt nobel_nationalities.txt | \e + sort -k2 -t, | join -t, -e "<<NULL>>" -1 2 -o "1.1 2.2" - capitals.txt Elie Ducommun,<<NULL>> Jean Henri Dunant,<<NULL>> .Ed .Sh COMPATIBILITY -For compatibility with historic versions of -.Nm , -the following options are available: -.Bl -tag -width indent -.It Fl a -In addition to the default output, produce a line for each unpairable line -in both -.Ar file1 -and -.Ar file2 . -.It Fl j1 Ar field -Join on the -.Ar field Ns 'th -field of -.Ar file1 . -.It Fl j2 Ar field -Join on the -.Ar field Ns 'th -field of -.Ar file2 . -.It Fl j Ar field -Join on the -.Ar field Ns 'th -field of both -.Ar file1 -and -.Ar file2 . -.It Fl o Ar list ... -Historical implementations of -.Nm -permitted multiple arguments to the +Compatibility with +.Fl a +and multi-operand .Fl o -option. -These arguments were of the form -.Ar file_number . Ns Ar field_number -as described -for the current -.Fl o -option. -This has obvious difficulties in the presence of files named -.Pa 1.2 . -.El -.Pp -These options are available only so historic shell scripts do not require -modification and should not be used. +from historical versions of +.Nm +got dropped, compatibility with +.Fl j Ar field +is kept for now. .Sh SEE ALSO .Xr awk 1 , .Xr comm 1 , diff --git a/cmd/join.c b/cmd/join.c @@ -97,7 +97,6 @@ static void fieldarg(char *); static void joinlines(INPUT *, INPUT *); static int mbscoll(const char *, const char *); static char *mbssep(char **, const wchar_t *); -static void obsolete(char **); static void outfield(LINE *, unsigned long, int); static void outoneline(INPUT *, LINE *); static void outtwoline(INPUT *, LINE *, INPUT *, LINE *); @@ -118,15 +117,10 @@ main(int argc, char *argv[]) F2 = &input2; aflag = vflag = 0; - obsolete(argv); - while((ch = getopt(argc, argv, "\01a:e:j:1:2:o:t:v:")) != -1) + while((ch = getopt(argc, argv, "a:e:j:1:2:o:t:v:")) != -1) { switch(ch) { - case '\01': /* See comment in obsolete(). */ - aflag = 1; - F1->unpair = F2->unpair = 1; - break; case '1': if((F1->joinf = strtol(optarg, &end, 10)) < 1) errx(1, "-1 option field number less than 1"); if(*end) errx(1, "illegal field number -- %s", optarg); @@ -560,87 +554,6 @@ fieldarg(char *option) } static void -obsolete(char **argv) -{ - size_t len; - char **p, *ap, *t; - - while((ap = *++argv) != NULL) - { - /* Return if "--". */ - if(ap[0] == '-' && ap[1] == '-') return; - /* skip if not an option */ - if(ap[0] != '-') continue; - switch(ap[1]) - { - case 'a': - /* - * The original join allowed "-a", which meant the - * same as -a1 plus -a2. POSIX 1003.2, Draft 11.2 - * only specifies this as "-a 1" and "a -2", so we - * have to use another option flag, one that is - * unlikely to ever be used or accidentally entered - * on the command line. (Well, we could reallocate - * the argv array, but that hardly seems worthwhile.) - */ - if(ap[2] == '\0' && - (argv[1] == NULL || (strcmp(argv[1], "1") != 0 && strcmp(argv[1], "2") != 0))) - { - ap[1] = '\01'; - warnx("-a option used without an argument; " - "reverting to historical behavior"); - } - break; - case 'j': - /* - * The original join allowed "-j[12] arg" and "-j arg". - * Convert the former to "-[12] arg". Don't convert - * the latter since getopt(3) can handle it. - */ - switch(ap[2]) - { - case '1': - if(ap[3] != '\0') goto jbad; - ap[1] = '1'; - ap[2] = '\0'; - break; - case '2': - if(ap[3] != '\0') goto jbad; - ap[1] = '2'; - ap[2] = '\0'; - break; - case '\0': - break; - default: - jbad: - errx(1, "illegal option -- %s", ap); - usage(); - } - break; - case 'o': - /* - * The original join allowed "-o arg arg". - * Convert to "-o arg -o arg". - */ - if(ap[2] != '\0') break; - for(p = argv + 2; *p; ++p) - { - if(p[0][0] == '0' || ((p[0][0] != '1' && p[0][0] != '2') || p[0][1] != '.')) break; - len = strlen(*p); - if(len - 2 != strspn(*p + 2, "0123456789")) break; - if((t = malloc(len + 3)) == NULL) err(1, NULL); - t[0] = '-'; - t[1] = 'o'; - memmove(t + 2, *p, len + 1); - *p = t; - } - argv = p - 1; - break; - } - } -} - -static void usage(void) { (void)fprintf(stderr,