iso_parse.c (9541B)
- // utils-std: Collection of commonly available Unix tools
- // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
- // SPDX-License-Identifier: MPL-2.0
- #define _DEFAULT_SOURCE // tm_gmtoff/tm_zone
- #define _XOPEN_SOURCE 700 // strptime (NetBSD)
- #define _POSIX_C_SOURCE 200809L // st_atim/st_mtim
- #include "./iso_parse.h"
- #include <assert.h>
- #include <ctype.h> /* isdigit */
- #include <errno.h> /* errno */
- #include <inttypes.h> /* PRId16 */
- #include <limits.h> /* TZNAME_MAX */
- #include <stdio.h> /* perror, sscanf */
- #include <stdlib.h> /* strtol */
- #include <string.h> /* memset */
- #include <time.h> /* strptime, tm */
- static const char *short_weekday_name[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
- static const char *short_month_name[12] = {
- "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
- // Parses [+|-]HH:?MM timezone offsets
- static char *
- tzoffset_parse(char *s, struct tm *time, const char **errstr)
- {
- #ifndef TZNAME_MAX
- #define TZNAME_MAX _POSIX_TZNAME_MAX
- #endif
- #if TZNAME_MAX < 5
- #error TZNAME_MAX is too small
- #endif
- static char offname[TZNAME_MAX + 1] = "";
- int neg;
- if(s[0] == '+')
- neg = 0;
- else if(s[0] == '-')
- neg = 1;
- else
- {
- *errstr = "Invalid timezone offset, must start with + or -";
- return NULL;
- }
- size_t offname_i = 0;
- offname[offname_i++] = *s++;
- if(isdigit(s[0]) && isdigit(s[1]))
- {
- time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600;
- offname[offname_i++] = *s++;
- offname[offname_i++] = *s++;
- }
- else
- {
- *errstr = "Invalid timezone offset, no digits after <+|->";
- return NULL;
- }
- if(s[0] == ':') s++;
- if(isdigit(s[0]) && isdigit(s[1]))
- {
- time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60;
- offname[offname_i++] = *s++;
- offname[offname_i++] = *s++;
- }
- else
- {
- *errstr = "Invalid timezone offset, no digits after <+|->HH[:]";
- return NULL;
- }
- if(neg) time->tm_gmtoff = -time->tm_gmtoff;
- offname[offname_i++] = '\0';
- time->tm_isdst = 0;
- time->tm_zone = offname;
- return s;
- }
- // For iso_parse function
- // Sets *errstr to NULL when it isn't an email date-time
- //
- // Check if it could be Email / Internet Message Format datetime
- // - Ignores RFC822 (ARPA era, folding space, 2-digit year)
- // - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs)
- //
- // RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm"
- static char *
- email_date_parse(char *arg, struct tm *time, const char **errstr)
- {
- // Kept free of strptime() due to update/overriding being undefined and
- // requiring custom parsing, notably locale-free, which strptime() can't handle
- for(; isspace(arg[0]); arg++)
- ;
- // Change `time` only right before returning in case datetime is invalid
- struct tm tmp_time = *time;
- tmp_time.tm_isdst = -1;
- tmp_time.tm_wday = -1;
- if(arg[3] == ',')
- {
- // Because %a/%A is locale-dependent, Sunday is tm_wday=0
- for(size_t i = 0; i < 7; i++)
- {
- if(memcmp(arg, short_weekday_name[i], 3) == 0)
- {
- tmp_time.tm_wday = i;
- break;
- }
- }
- if(tmp_time.tm_wday == -1)
- {
- *errstr = "Failed parsing short weekday name";
- errno = 0;
- return NULL;
- }
- arg += 4;
- for(; isspace(arg[0]); arg++)
- ;
- }
- errno = 0;
- int parsed = 0;
- char month_name[4] = "";
- if(sscanf(arg,
- "%2d %3s %d %2d:%2d%n",
- &tmp_time.tm_mday,
- month_name,
- &tmp_time.tm_year,
- &tmp_time.tm_hour,
- &tmp_time.tm_min,
- &parsed) < 5)
- {
- if(errno == 0 || errno == EINVAL)
- {
- if(tmp_time.tm_wday == -1)
- {
- *errstr = NULL;
- }
- else
- {
- *errstr = "Failed parsing Email-datetime";
- }
- }
- else
- {
- *errstr = strerror(errno);
- errno = 0;
- }
- return NULL;
- }
- if(tmp_time.tm_year < 49)
- {
- tmp_time.tm_year += 100; // 2000-2049
- }
- else if(tmp_time.tm_year > 99)
- {
- tmp_time.tm_year -= 1900;
- }
- arg += parsed;
- if(arg[0] == ':' && isdigit(arg[1]))
- {
- if(isdigit(arg[2]))
- {
- tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0');
- arg += 3;
- }
- else
- {
- tmp_time.tm_sec = arg[1] - '0';
- arg += 2;
- }
- }
- for(; isspace(arg[0]); arg++)
- ;
- // Consider that nobody is going to transmit a timezone name which isn't GMT
- if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3])))
- {
- tmp_time.tm_isdst = 0;
- tmp_time.tm_gmtoff = 0;
- tmp_time.tm_zone = "UTC";
- }
- else
- {
- arg = tzoffset_parse(arg, &tmp_time, errstr);
- if(arg == NULL) return NULL;
- }
- // Done extracting directly from arg
- tmp_time.tm_mon = -1;
- // Because %b/%B is locale-dependent
- for(size_t i = 0; i < 12; i++)
- {
- if(memcmp(month_name, short_month_name[i], 3) == 0)
- {
- tmp_time.tm_mon = i;
- break;
- }
- }
- if(tmp_time.tm_mon < 0)
- {
- *errstr = "Failed parsing short month name";
- errno = 0;
- return NULL;
- }
- memcpy(time, &tmp_time, sizeof(tmp_time));
- return arg;
- }
- // For iso_parse function
- // Sets *errstr to NULL when it isn't an email date-time
- //
- // Check if it could be asctime() format: Thu Nov 24 18:22:48 1986
- static char *
- asctime_date_parse(char *arg, struct tm *time, const char **errstr)
- {
- // Kept free of strptime() due to update/overriding being undefined and
- // requiring custom parsing, notably locale-free, which strptime() can't handle
- // Change `time` only right before returning in case datetime is invalid
- struct tm tmp_time = *time;
- tmp_time.tm_isdst = -1;
- tmp_time.tm_wday = -1;
- // asctime() doesn't gives any timezone information, assume UTC
- tmp_time.tm_isdst = 0;
- tmp_time.tm_gmtoff = 0;
- tmp_time.tm_zone = "UTC";
- errno = 0;
- int parsed = 0;
- char month_name[4] = "";
- char weekday_name[4] = "";
- if(sscanf(arg,
- "%3s %3s %d %2d:%2d:%2d %d%n",
- weekday_name,
- month_name,
- &tmp_time.tm_mday,
- &tmp_time.tm_hour,
- &tmp_time.tm_min,
- &tmp_time.tm_sec,
- &tmp_time.tm_year,
- &parsed) < 7)
- {
- if(errno == 0 || errno == EINVAL)
- {
- *errstr = NULL;
- }
- else
- {
- *errstr = strerror(errno);
- errno = 0;
- }
- return NULL;
- }
- arg += parsed;
- tmp_time.tm_year -= 1900;
- tmp_time.tm_wday = -1;
- // Because %a/%A is locale-dependent
- for(size_t i = 0; i < 7; i++)
- {
- if(memcmp(weekday_name, short_weekday_name[i], 3) == 0)
- {
- tmp_time.tm_wday = i;
- break;
- }
- }
- if(tmp_time.tm_wday < 0)
- {
- *errstr = "Failed parsing short weekday name";
- errno = 0;
- return NULL;
- }
- tmp_time.tm_mon = -1;
- // Because %b/%B is locale-dependent
- for(size_t i = 0; i < 12; i++)
- {
- if(memcmp(month_name, short_month_name[i], 3) == 0)
- {
- tmp_time.tm_mon = i;
- break;
- }
- }
- if(tmp_time.tm_mon < 0)
- {
- *errstr = "Failed parsing short month name";
- errno = 0;
- return NULL;
- }
- for(; isspace(arg[0]); arg++)
- ;
- memcpy(time, &tmp_time, sizeof(tmp_time));
- return arg;
- }
- // Sets errstr on failure
- // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)?
- char *
- iso_parse(char *arg, struct tm *time, long *nsec, const char **errstr)
- {
- *nsec = 0;
- // For Alpine's abuild compatibility
- if(arg[0] == '@')
- {
- arg++;
- char *endptr = NULL;
- time_t now = strtol(arg, &endptr, 10);
- if(errno != 0)
- {
- *errstr = strerror(errno);
- errno = 0;
- return NULL;
- }
- gmtime_r(&now, time);
- return endptr;
- }
- char *ret = NULL;
- ret = email_date_parse(arg, time, errstr);
- if(ret != NULL || *errstr != NULL)
- {
- return ret;
- }
- ret = asctime_date_parse(arg, time, errstr);
- if(ret != NULL || *errstr != NULL)
- {
- return ret;
- }
- // Try parsing as RFC3339 subset of ISO 8601:1988
- // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024
- // instead a single strptime() call should be done
- // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>)
- char *s = strptime(arg, "%Y-%m-%d", time);
- if(s == NULL)
- {
- *errstr = "strptime(…, \"%Y-%m-%d\", …) returned NULL";
- errno = 0;
- return NULL;
- }
- if(s[0] != 'T' && s[0] != ' ')
- {
- *errstr = "Couldn't find time-separator (T or space) after date (Y-m-d)";
- errno = 0;
- return NULL;
- }
- s++;
- s = strptime(s, "%H:%M:%S", time);
- if(s == NULL)
- {
- *errstr = "strptime(…, \"%H:%M:%S\", …) returned NULL";
- errno = 0;
- return NULL;
- }
- if(s[0] == ',' || s[0] == '.')
- {
- double fraction = 0.0;
- int parsed = 0;
- if(s[0] == ',') s[0] = '.';
- if(sscanf(s, "%10lf%n", &fraction, &parsed) < 1)
- {
- if(errno == 0)
- {
- *errstr = "Failed to parse fractional seconds";
- }
- else
- {
- *errstr = strerror(errno);
- errno = 0;
- }
- return NULL;
- }
- *nsec = (long)(fraction * 1000000000);
- s += parsed;
- // too many digits
- if(isdigit(s[0]))
- {
- *errstr = "Too many digits (> 10) for fractional seconds";
- return NULL;
- }
- }
- for(; isspace(s[0]); s++)
- ;
- if(s != NULL && s[0] != '\0')
- {
- if(s[0] == 'Z' && s[1] == '\0')
- {
- time->tm_isdst = 0;
- time->tm_gmtoff = 0;
- time->tm_zone = "UTC";
- }
- else
- {
- s = tzoffset_parse(s, time, errstr);
- if(s == NULL) return NULL;
- }
- }
- return s;
- }
- // Because mktime() messes with tm_gmtoff yet doesn't applies it, even in POSIX.1-2024
- // Returns (time_t)-1 on failure
- time_t
- mktime_tz(struct tm *tm)
- {
- long gmtoff = tm->tm_gmtoff;
- const char *zone = tm->tm_zone;
- time_t res = mktime(tm);
- tm->tm_gmtoff = gmtoff;
- tm->tm_zone = zone;
- if(res == (time_t)-1) return res;
- // 12:00+02:00 corresponds to 10:00Z so needs to be reversed
- res += -gmtoff;
- return res;
- }