logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/
commit: b4a83c313b6f36d6823fcf8fa370fad5c697dc8a
parent c343f9b2e86d9ba52d9fcb33033336e8a038b435
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Mon, 24 Feb 2025 13:18:15 +0100

lib/iso_parse: Add support for email datetimes

Diffstat:

Mcmd/date.1.in10++++++----
Mlib/iso_parse.c276+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------
Mlib/iso_parse.mdoc14+++++++++++---
Mtest-cmd/date.sh20+++++++++++++++++++-
4 files changed, 258 insertions(+), 62 deletions(-)

diff --git a/cmd/date.1.in b/cmd/date.1.in @@ -28,7 +28,8 @@ When .Nm is invoked without arguments it displays the current datetime -Otherwise, depending on the options specified, will print the datetime in a user-defined way. +Otherwise, depending on the options specified, +will print the datetime in a user-defined way. .Sh OPTIONS .Bl -tag -width Ds .It Fl d Ar datetime @@ -75,7 +76,7 @@ instead of current datetime. .It Fl R Set the default value of .Ar format -to match RFC5322 (Internet Message Format). +to match RFC5322 (Email / Internet Message Format). .It Ar mmddHHMM Ns Oo Oo Ar CC Oc Ns Ar yy Oc Sets custom datetime, if .Fl j @@ -97,7 +98,8 @@ centuries aka %C century-less years aka %y .El .Pp -For example 072505542024 corresponds to 2024-07-25T05:54, as you can verify with the following command: +For example 072505542024 corresponds to 2024-07-25T05:54, +as you can verify with the following command: .Dl date -j 072505542024 +%Y-%m-%dT%H:%M .It Cm + Ns Ar format Set the displayed datetime in @@ -112,7 +114,7 @@ Otherwise defaults to .Sh ENVIRONMENT Look at the manual page of .Xr strftime 3 -for the environment variables, typical ones are +for the environment variables, typical ones are .Ev TZ , .Ev LC_TIME and diff --git a/lib/iso_parse.c b/lib/iso_parse.c @@ -18,6 +18,213 @@ #include <string.h> /* memset */ #include <time.h> /* strptime, tm */ +static const char *short_weekday_name[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; +static const char *short_month_name[12] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"}; + +// Parses [+|-]HH:?MM timezone offsets +static char * +tzoffset_parse(char *s, struct tm *time, const char **errstr) +{ +#ifndef TZNAME_MAX +#define TZNAME_MAX _POSIX_TZNAME_MAX +#endif +#if TZNAME_MAX < 5 +#error TZNAME_MAX is too small +#endif + static char offname[TZNAME_MAX + 1] = ""; + + int neg; + if(s[0] == '+') + neg = 0; + else if(s[0] == '-') + neg = 1; + else + { + *errstr = "Invalid timezone offset, must start with + or -"; + return NULL; + } + + size_t offname_i = 0; + offname[offname_i++] = *s++; + + if(isdigit(s[0]) && isdigit(s[1])) + { + time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600; + offname[offname_i++] = *s++; + offname[offname_i++] = *s++; + } + else + { + *errstr = "Invalid timezone offset, no digits after <+|->"; + return NULL; + } + + if(s[0] == ':') s++; + + if(isdigit(s[0]) && isdigit(s[1])) + { + time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60; + offname[offname_i++] = *s++; + offname[offname_i++] = *s++; + } + else + { + *errstr = "Invalid timezone offset, no digits after <+|->HH[:]"; + return NULL; + } + + if(neg) time->tm_gmtoff = -time->tm_gmtoff; + + offname[offname_i++] = '\0'; + + time->tm_isdst = 0; + time->tm_zone = offname; + + return s; +} + +// For iso_parse function +// Sets *errstr to NULL when it isn't an email date-time +// +// Check if it could be Email / Internet Message Format datetime +// - Ignores RFC822 (ARPA era, folding space, 2-digit year) +// - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs) +// +// RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm" +static char * +email_date_parse(char *arg, struct tm *time, const char **errstr) +{ + // Kept free of strptime() due to update/overriding being undefined and + // requiring custom parsing, notably locale-free, which strptime() can't handle + + for(; isspace(arg[0]); arg++) + ; + + // Change `time` only right before returning in case datetime is invalid + struct tm tmp_time = *time; + tmp_time.tm_isdst = -1; + tmp_time.tm_wday = -1; + + if(arg[3] == ',') + { + // Because %a/%A is locale-dependent, Sunday is tm_wday=0 + for(size_t i = 0; i < 7; i++) + { + if(memcmp(arg, short_weekday_name[i], 3) == 0) + { + tmp_time.tm_wday = i; + break; + } + } + + if(tmp_time.tm_wday == -1) + { + *errstr = "Failed parsing short weekday name"; + errno = 0; + return NULL; + } + + arg += 4; + + for(; isspace(arg[0]); arg++) + ; + } + + errno = 0; + int parsed = 0; + char month_name[4] = ""; + if(sscanf(arg, + "%2d %3s %d %2d:%2d%n", + &tmp_time.tm_mday, + month_name, + &tmp_time.tm_year, + &tmp_time.tm_hour, + &tmp_time.tm_min, + &parsed) < 5) + { + if(errno == 0 || errno == EINVAL) + { + if(tmp_time.tm_wday == -1) + { + *errstr = NULL; + } + else + { + *errstr = "Failed parsing Email-datetime"; + } + } + else + { + *errstr = strerror(errno); + errno = 0; + } + return NULL; + } + + if(tmp_time.tm_year < 49) + { + tmp_time.tm_year += 100; // 2000-2049 + } + else if(tmp_time.tm_year > 99) + { + tmp_time.tm_year -= 1900; + } + + arg += parsed; + + if(arg[0] == ':' && isdigit(arg[1])) + { + if(isdigit(arg[2])) + { + tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0'); + arg += 3; + } + else + { + tmp_time.tm_sec = arg[1] - '0'; + arg += 2; + } + } + + for(; isspace(arg[0]); arg++) + ; + + // Consider that nobody is going to transmit a timezone name which isn't GMT + if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3]))) + { + tmp_time.tm_isdst = 0; + tmp_time.tm_gmtoff = 0; + tmp_time.tm_zone = "UTC"; + } + else + { + arg = tzoffset_parse(arg, &tmp_time, errstr); + if(arg == NULL) return NULL; + } + + // Done extracting directly from arg + + tmp_time.tm_mon = -1; + // Because %b/%B is locale-dependent + for(size_t i = 0; i < 12; i++) + { + if(memcmp(month_name, short_month_name[i], 3) == 0) + { + tmp_time.tm_mon = i; + break; + } + } + if(tmp_time.tm_mon < 0) + { + *errstr = "Failed parsing short month name"; + errno = 0; + return NULL; + } + + memcpy(time, &tmp_time, sizeof(tmp_time)); + return arg; +} + // Sets errstr on failure // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)? char * @@ -44,7 +251,18 @@ iso_parse(char *arg, struct tm *time, long *nsec, const char **errstr) return endptr; } - // No %F in POSIX + char *ret = email_date_parse(arg, time, errstr); + if(ret != NULL || *errstr != NULL) + { + return ret; + } + + // Try parsing as RFC3339 subset of ISO 8601:1988 + + // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024 + // instead a single strptime() call should be done + + // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>) char *s = strptime(arg, "%Y-%m-%d", time); if(s == NULL) @@ -105,64 +323,14 @@ iso_parse(char *arg, struct tm *time, long *nsec, const char **errstr) { if(s[0] == 'Z' && s[1] == '\0') { + time->tm_isdst = 0; time->tm_gmtoff = 0; time->tm_zone = "UTC"; } else { -#ifndef TZNAME_MAX -#define TZNAME_MAX _POSIX_TZNAME_MAX -#endif -#if TZNAME_MAX < 5 -#error TZNAME_MAX is too small -#endif - static char offname[TZNAME_MAX + 1] = ""; - - int neg; - if(s[0] == '+') - neg = 0; - else if(s[0] == '-') - neg = 1; - else - { - *errstr = "Invalid timezone offset, must start with + or -"; - return NULL; - } - - size_t offname_i = 0; - offname[offname_i++] = *s++; - - if(isdigit(s[0]) && isdigit(s[1])) - { - time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600; - offname[offname_i++] = *s++; - offname[offname_i++] = *s++; - } - else - { - *errstr = "Invalid timezone offset, no digits after <+|->"; - return NULL; - } - - if(s[0] == ':') s++; - - if(isdigit(s[0]) && isdigit(s[1])) - { - time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60; - offname[offname_i++] = *s++; - offname[offname_i++] = *s++; - } - else - { - *errstr = "Invalid timezone offset, no digits after <+|->HH[:]"; - return NULL; - } - - if(neg) time->tm_gmtoff = -time->tm_gmtoff; - - offname[offname_i++] = '\0'; - - time->tm_zone = offname; + s = tzoffset_parse(s, time, errstr); + if(s == NULL) return NULL; } } diff --git a/lib/iso_parse.mdoc b/lib/iso_parse.mdoc @@ -6,13 +6,21 @@ .\" .Fl d Ar datetime .\" .so lib/iso_parse.mdoc .\" -Should be formatted either with a leading @ (at) symbol followed by +Should be formatted either with a leading @ (at) symbol followed by the Unix timestamp (number of seconds before and after 1970-01-01 00:00:00Z), for example .Ql @1698791420 corresponds to 2023-10-31 23:30:20 UTC .Pp -Or as +Or as Email / "Internet Message Format" (RFC5322, RFC2822, RFC822), for example: +.Bl -bullet -compact +.It +.Ql Fri, 21 Nov 1997 09:55:06 -0600 +.It +.Ql 21 Nov 97 09:55:06 GMT +.El +.Pp +Or as RFC3339 which looks like .Ql YYYY-MM-DDThh:mm:SS[frac][tz] , where: .Bl -tag -width Ds @@ -33,7 +41,7 @@ When empty it corresponds to local time. Otherwise it can be an UTC offset in the format .Ql [+-]HH:?MM or the letter -.Qq Z , +.Qq Z , signifying UTC. .El .Pp diff --git a/test-cmd/date.sh b/test-cmd/date.sh @@ -3,7 +3,7 @@ # SPDX-License-Identifier: MPL-2.0 target="$(dirname "$0")/../cmd/date" -plans=27 +plans=31 . "$(dirname "$0")/tap.sh" . "$(dirname "$0")/init_env.sh" @@ -76,6 +76,24 @@ t 'iso Nano-Seconds' '-u -d 2025-02-10T21:05:53,437742835+01:00 -Ins' '2025-02-1 t '+foo%%bar' '+foo%%bar' 'foo%bar ' +t_args 'email RFC2822-RFC5322-example1' '1997-11-21 15:55:06+0000 +' -u -d 'Fri, 21 Nov 1997 09:55:06 -0600' '+%F %T%z' + +# date -u -d '1969-02-13 23:32:00-0330' '+%F %T%z' +t_args 'email RFC2822-RFC5322-whitespace-oddities' '1969-02-14 03:02:00+0000 +' -u -d 'Thu, + 13 + Feb + 1969 + 23:32 + -0330' '+%F %T%z' + +t_args 'email RFC2822-RFC5322-obsolete-dates' '1997-11-21 09:55:06+0000 +' -u -d '21 Nov 97 09:55:06 GMT' '+%F %T%z' + +t_args 'email Y2K' '2017-11-21 09:55:06+0000 +' -u -d '21 Nov 17 09:55:06 GMT' '+%F %T%z' + #usage="\ #date [-uR] [-d datetime] [+format] #date [-uR] -f now_format now [+format]