iso_parse.c (9769B)
- // utils-std: Collection of commonly available Unix tools
 - // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
 - // SPDX-License-Identifier: MPL-2.0
 - #define _DEFAULT_SOURCE // tm_gmtoff/tm_zone
 - #define _XOPEN_SOURCE 700 // strptime (NetBSD)
 - #define _POSIX_C_SOURCE 200809L // st_atim/st_mtim
 - #include "./iso_parse.h"
 - #include <assert.h>
 - #include <ctype.h> /* isdigit */
 - #include <errno.h> /* errno */
 - #include <inttypes.h> /* PRId16 */
 - #include <limits.h> /* TZNAME_MAX */
 - #include <stdio.h> /* perror, sscanf */
 - #include <stdlib.h> /* strtol */
 - #include <string.h> /* memset */
 - #include <time.h> /* strptime, tm */
 - static const char *short_weekday_name[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
 - static const char *short_month_name[12] = {
 - "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
 - // Parses [+|-]HH:?MM timezone offsets
 - // Would need tzalloc from <https://www.austingroupbugs.net/view.php?id=1794> to parse timezone names
 - static char *
 - tzoffset_parse(char *s, struct tm *time, const char **errstr)
 - {
 - #ifndef TZNAME_MAX
 - #define TZNAME_MAX _POSIX_TZNAME_MAX
 - #endif
 - #if TZNAME_MAX < 5
 - #error TZNAME_MAX is too small
 - #endif
 - static char offname[TZNAME_MAX + 1] = "";
 - int neg;
 - if(s[0] == '+')
 - neg = 0;
 - else if(s[0] == '-')
 - neg = 1;
 - else
 - {
 - *errstr = "Invalid timezone offset, must start with + or -";
 - return NULL;
 - }
 - size_t offname_i = 0;
 - offname[offname_i++] = *s++;
 - if(isdigit(s[0]) && isdigit(s[1]))
 - {
 - time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600;
 - offname[offname_i++] = *s++;
 - offname[offname_i++] = *s++;
 - }
 - else
 - {
 - *errstr = "Invalid timezone offset, no digits after <+|->";
 - return NULL;
 - }
 - if(s[0] == ':') s++;
 - if(isdigit(s[0]) && isdigit(s[1]))
 - {
 - time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60;
 - offname[offname_i++] = *s++;
 - offname[offname_i++] = *s++;
 - }
 - else
 - {
 - *errstr = "Invalid timezone offset, no digits after <+|->HH[:]";
 - return NULL;
 - }
 - if(neg) time->tm_gmtoff = -time->tm_gmtoff;
 - offname[offname_i++] = '\0';
 - time->tm_isdst = 0;
 - time->tm_zone = offname;
 - return s;
 - }
 - // For iso_parse function
 - // Sets *errstr to NULL when it isn't an email date-time
 - //
 - // Check if it could be Email / Internet Message Format datetime
 - // - Ignores RFC822 (ARPA era, folding space, 2-digit year)
 - // - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs)
 - //
 - // RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm"
 - static char *
 - email_date_parse(char *arg, struct tm *time, const char **errstr)
 - {
 - // Kept free of strptime() due to update/overriding being undefined and
 - // requiring custom parsing, notably locale-free, which strptime() can't handle
 - for(; isspace(arg[0]); arg++)
 - ;
 - // Change `time` only right before returning in case datetime is invalid
 - struct tm tmp_time = *time;
 - tmp_time.tm_isdst = -1;
 - tmp_time.tm_wday = -1;
 - if(arg[3] == ',')
 - {
 - // Because %a/%A is locale-dependent, Sunday is tm_wday=0
 - for(size_t i = 0; i < 7; i++)
 - {
 - if(memcmp(arg, short_weekday_name[i], 3) == 0)
 - {
 - tmp_time.tm_wday = i;
 - break;
 - }
 - }
 - if(tmp_time.tm_wday == -1)
 - {
 - *errstr = "Failed parsing short weekday name";
 - errno = 0;
 - return NULL;
 - }
 - arg += 4;
 - for(; isspace(arg[0]); arg++)
 - ;
 - }
 - errno = 0;
 - int parsed = 0;
 - char month_name[4] = "";
 - if(sscanf(arg,
 - "%2d %3s %d %2d:%2d%n",
 - &tmp_time.tm_mday,
 - month_name,
 - &tmp_time.tm_year,
 - &tmp_time.tm_hour,
 - &tmp_time.tm_min,
 - &parsed) < 5)
 - {
 - if(errno == 0 || errno == EINVAL)
 - {
 - if(tmp_time.tm_wday == -1)
 - {
 - *errstr = NULL;
 - }
 - else
 - {
 - *errstr = "Failed parsing Email-datetime";
 - }
 - }
 - else
 - {
 - *errstr = strerror(errno);
 - errno = 0;
 - }
 - return NULL;
 - }
 - if(tmp_time.tm_year < 49)
 - {
 - tmp_time.tm_year += 100; // 2000-2049
 - }
 - else if(tmp_time.tm_year > 99)
 - {
 - tmp_time.tm_year -= 1900;
 - }
 - arg += parsed;
 - if(arg[0] == ':' && isdigit(arg[1]))
 - {
 - if(isdigit(arg[2]))
 - {
 - tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0');
 - arg += 3;
 - }
 - else
 - {
 - tmp_time.tm_sec = arg[1] - '0';
 - arg += 2;
 - }
 - }
 - for(; isspace(arg[0]); arg++)
 - ;
 - // Consider that nobody is going to transmit a timezone name which isn't GMT
 - if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3])))
 - {
 - tmp_time.tm_isdst = 0;
 - tmp_time.tm_gmtoff = 0;
 - tmp_time.tm_zone = "UTC";
 - }
 - else
 - {
 - arg = tzoffset_parse(arg, &tmp_time, errstr);
 - if(arg == NULL) return NULL;
 - }
 - // Done extracting directly from arg
 - tmp_time.tm_mon = -1;
 - // Because %b/%B is locale-dependent
 - for(size_t i = 0; i < 12; i++)
 - {
 - if(memcmp(month_name, short_month_name[i], 3) == 0)
 - {
 - tmp_time.tm_mon = i;
 - break;
 - }
 - }
 - if(tmp_time.tm_mon < 0)
 - {
 - *errstr = "Failed parsing short month name";
 - errno = 0;
 - return NULL;
 - }
 - memcpy(time, &tmp_time, sizeof(tmp_time));
 - return arg;
 - }
 - // For iso_parse function
 - // Sets *errstr to NULL when it isn't an email date-time
 - //
 - // Check if it could be asctime() format: Thu Nov 24 18:22:48 1986
 - static char *
 - asctime_date_parse(char *arg, struct tm *time, const char **errstr)
 - {
 - // Kept free of strptime() due to update/overriding being undefined and
 - // requiring custom parsing, notably locale-free, which strptime() can't handle
 - // Change `time` only right before returning in case datetime is invalid
 - struct tm tmp_time = *time;
 - tmp_time.tm_isdst = -1;
 - tmp_time.tm_wday = -1;
 - // asctime() doesn't gives any timezone information, assume UTC
 - tmp_time.tm_isdst = 0;
 - tmp_time.tm_gmtoff = 0;
 - tmp_time.tm_zone = "UTC";
 - errno = 0;
 - int parsed = 0;
 - char month_name[4] = "";
 - char weekday_name[4] = "";
 - if(sscanf(arg,
 - "%3s %3s %d %2d:%2d:%2d %d%n",
 - weekday_name,
 - month_name,
 - &tmp_time.tm_mday,
 - &tmp_time.tm_hour,
 - &tmp_time.tm_min,
 - &tmp_time.tm_sec,
 - &tmp_time.tm_year,
 - &parsed) < 7)
 - {
 - if(errno == 0 || errno == EINVAL)
 - {
 - *errstr = NULL;
 - }
 - else
 - {
 - *errstr = strerror(errno);
 - errno = 0;
 - }
 - return NULL;
 - }
 - arg += parsed;
 - tmp_time.tm_year -= 1900;
 - tmp_time.tm_wday = -1;
 - // Because %a/%A is locale-dependent
 - for(size_t i = 0; i < 7; i++)
 - {
 - if(memcmp(weekday_name, short_weekday_name[i], 3) == 0)
 - {
 - tmp_time.tm_wday = i;
 - break;
 - }
 - }
 - if(tmp_time.tm_wday < 0)
 - {
 - *errstr = "Failed parsing short weekday name";
 - errno = 0;
 - return NULL;
 - }
 - tmp_time.tm_mon = -1;
 - // Because %b/%B is locale-dependent
 - for(size_t i = 0; i < 12; i++)
 - {
 - if(memcmp(month_name, short_month_name[i], 3) == 0)
 - {
 - tmp_time.tm_mon = i;
 - break;
 - }
 - }
 - if(tmp_time.tm_mon < 0)
 - {
 - *errstr = "Failed parsing short month name";
 - errno = 0;
 - return NULL;
 - }
 - for(; isspace(arg[0]); arg++)
 - ;
 - memcpy(time, &tmp_time, sizeof(tmp_time));
 - return arg;
 - }
 - // Sets errstr on failure
 - // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)?
 - char *
 - iso_parse(char *arg, struct tm *time, long *nsec, const char **errstr)
 - {
 - *nsec = 0;
 - // For Alpine's abuild compatibility
 - if(arg[0] == '@')
 - {
 - arg++;
 - char *endptr = NULL;
 - time_t now = strtol(arg, &endptr, 10);
 - if(errno != 0)
 - {
 - *errstr = strerror(errno);
 - errno = 0;
 - return NULL;
 - }
 - gmtime_r(&now, time);
 - return endptr;
 - }
 - char *ret = NULL;
 - ret = email_date_parse(arg, time, errstr);
 - if(ret != NULL || *errstr != NULL)
 - {
 - return ret;
 - }
 - ret = asctime_date_parse(arg, time, errstr);
 - if(ret != NULL || *errstr != NULL)
 - {
 - return ret;
 - }
 - // Try parsing as RFC3339 subset of ISO 8601:1988
 - // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024
 - // instead a single strptime() call should be done
 - // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>)
 - char *s = strptime(arg, "%Y-%m-%d", time);
 - if(s == NULL)
 - {
 - *errstr = "strptime(…, \"%Y-%m-%d\", …) returned NULL";
 - errno = 0;
 - return NULL;
 - }
 - if(s[0] != 'T' && s[0] != ' ')
 - {
 - *errstr = "Couldn't find time-separator (T or space) after date (Y-m-d)";
 - errno = 0;
 - return NULL;
 - }
 - s++;
 - s = strptime(s, "%H:%M:%S", time);
 - if(s == NULL)
 - {
 - *errstr = "strptime(…, \"%H:%M:%S\", …) returned NULL";
 - errno = 0;
 - return NULL;
 - }
 - if(s[0] == ',' || s[0] == '.')
 - {
 - double fraction = 0.0;
 - int parsed = 0;
 - if(s[0] == ',') s[0] = '.';
 - if(sscanf(s, "%10lf%n", &fraction, &parsed) < 1)
 - {
 - if(errno == 0)
 - {
 - *errstr = "Failed to parse fractional seconds";
 - }
 - else
 - {
 - *errstr = strerror(errno);
 - errno = 0;
 - }
 - return NULL;
 - }
 - *nsec = (long)(fraction * 1000000000);
 - s += parsed;
 - // too many digits
 - if(isdigit(s[0]))
 - {
 - *errstr = "Too many digits (> 10) for fractional seconds";
 - return NULL;
 - }
 - }
 - for(; isspace(s[0]); s++)
 - ;
 - if(s != NULL && s[0] != '\0')
 - {
 - if(s[0] == 'Z' && s[1] == '\0')
 - {
 - time->tm_isdst = 0;
 - time->tm_gmtoff = 0;
 - time->tm_zone = "UTC";
 - }
 - else
 - {
 - s = tzoffset_parse(s, time, errstr);
 - if(s == NULL) return NULL;
 - }
 - }
 - return s;
 - }
 - // Because mktime() messes with tm_gmtoff yet doesn't applies it, even in POSIX.1-2024
 - // Returns (time_t)-1 on failure
 - // Maybe should be replaced by mktime_z once <https://www.austingroupbugs.net/view.php?id=1794> gets accepted and implemented
 - time_t
 - mktime_tz(struct tm *tm)
 - {
 - long gmtoff = tm->tm_gmtoff;
 - const char *zone = tm->tm_zone;
 - time_t res = mktime(tm);
 - tm->tm_gmtoff = gmtoff;
 - tm->tm_zone = zone;
 - if(res == (time_t)-1) return res;
 - // 12:00+02:00 corresponds to 10:00Z so needs to be reversed
 - res += -gmtoff;
 - return res;
 - }