logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

iso_parse.c (9769B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _DEFAULT_SOURCE // tm_gmtoff/tm_zone
  5. #define _XOPEN_SOURCE 700 // strptime (NetBSD)
  6. #define _POSIX_C_SOURCE 200809L // st_atim/st_mtim
  7. #include "./iso_parse.h"
  8. #include <assert.h>
  9. #include <ctype.h> /* isdigit */
  10. #include <errno.h> /* errno */
  11. #include <inttypes.h> /* PRId16 */
  12. #include <limits.h> /* TZNAME_MAX */
  13. #include <stdio.h> /* perror, sscanf */
  14. #include <stdlib.h> /* strtol */
  15. #include <string.h> /* memset */
  16. #include <time.h> /* strptime, tm */
  17. static const char *short_weekday_name[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
  18. static const char *short_month_name[12] = {
  19. "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  20. // Parses [+|-]HH:?MM timezone offsets
  21. // Would need tzalloc from <https://www.austingroupbugs.net/view.php?id=1794> to parse timezone names
  22. static char *
  23. tzoffset_parse(char *s, struct tm *time, const char **errstr)
  24. {
  25. #ifndef TZNAME_MAX
  26. #define TZNAME_MAX _POSIX_TZNAME_MAX
  27. #endif
  28. #if TZNAME_MAX < 5
  29. #error TZNAME_MAX is too small
  30. #endif
  31. static char offname[TZNAME_MAX + 1] = "";
  32. int neg;
  33. if(s[0] == '+')
  34. neg = 0;
  35. else if(s[0] == '-')
  36. neg = 1;
  37. else
  38. {
  39. *errstr = "Invalid timezone offset, must start with + or -";
  40. return NULL;
  41. }
  42. size_t offname_i = 0;
  43. offname[offname_i++] = *s++;
  44. if(isdigit(s[0]) && isdigit(s[1]))
  45. {
  46. time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600;
  47. offname[offname_i++] = *s++;
  48. offname[offname_i++] = *s++;
  49. }
  50. else
  51. {
  52. *errstr = "Invalid timezone offset, no digits after <+|->";
  53. return NULL;
  54. }
  55. if(s[0] == ':') s++;
  56. if(isdigit(s[0]) && isdigit(s[1]))
  57. {
  58. time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60;
  59. offname[offname_i++] = *s++;
  60. offname[offname_i++] = *s++;
  61. }
  62. else
  63. {
  64. *errstr = "Invalid timezone offset, no digits after <+|->HH[:]";
  65. return NULL;
  66. }
  67. if(neg) time->tm_gmtoff = -time->tm_gmtoff;
  68. offname[offname_i++] = '\0';
  69. time->tm_isdst = 0;
  70. time->tm_zone = offname;
  71. return s;
  72. }
  73. // For iso_parse function
  74. // Sets *errstr to NULL when it isn't an email date-time
  75. //
  76. // Check if it could be Email / Internet Message Format datetime
  77. // - Ignores RFC822 (ARPA era, folding space, 2-digit year)
  78. // - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs)
  79. //
  80. // RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm"
  81. static char *
  82. email_date_parse(char *arg, struct tm *time, const char **errstr)
  83. {
  84. // Kept free of strptime() due to update/overriding being undefined and
  85. // requiring custom parsing, notably locale-free, which strptime() can't handle
  86. for(; isspace(arg[0]); arg++)
  87. ;
  88. // Change `time` only right before returning in case datetime is invalid
  89. struct tm tmp_time = *time;
  90. tmp_time.tm_isdst = -1;
  91. tmp_time.tm_wday = -1;
  92. if(arg[3] == ',')
  93. {
  94. // Because %a/%A is locale-dependent, Sunday is tm_wday=0
  95. for(size_t i = 0; i < 7; i++)
  96. {
  97. if(memcmp(arg, short_weekday_name[i], 3) == 0)
  98. {
  99. tmp_time.tm_wday = i;
  100. break;
  101. }
  102. }
  103. if(tmp_time.tm_wday == -1)
  104. {
  105. *errstr = "Failed parsing short weekday name";
  106. errno = 0;
  107. return NULL;
  108. }
  109. arg += 4;
  110. for(; isspace(arg[0]); arg++)
  111. ;
  112. }
  113. errno = 0;
  114. int parsed = 0;
  115. char month_name[4] = "";
  116. if(sscanf(arg,
  117. "%2d %3s %d %2d:%2d%n",
  118. &tmp_time.tm_mday,
  119. month_name,
  120. &tmp_time.tm_year,
  121. &tmp_time.tm_hour,
  122. &tmp_time.tm_min,
  123. &parsed) < 5)
  124. {
  125. if(errno == 0 || errno == EINVAL)
  126. {
  127. if(tmp_time.tm_wday == -1)
  128. {
  129. *errstr = NULL;
  130. }
  131. else
  132. {
  133. *errstr = "Failed parsing Email-datetime";
  134. }
  135. }
  136. else
  137. {
  138. *errstr = strerror(errno);
  139. errno = 0;
  140. }
  141. return NULL;
  142. }
  143. if(tmp_time.tm_year < 49)
  144. {
  145. tmp_time.tm_year += 100; // 2000-2049
  146. }
  147. else if(tmp_time.tm_year > 99)
  148. {
  149. tmp_time.tm_year -= 1900;
  150. }
  151. arg += parsed;
  152. if(arg[0] == ':' && isdigit(arg[1]))
  153. {
  154. if(isdigit(arg[2]))
  155. {
  156. tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0');
  157. arg += 3;
  158. }
  159. else
  160. {
  161. tmp_time.tm_sec = arg[1] - '0';
  162. arg += 2;
  163. }
  164. }
  165. for(; isspace(arg[0]); arg++)
  166. ;
  167. // Consider that nobody is going to transmit a timezone name which isn't GMT
  168. if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3])))
  169. {
  170. tmp_time.tm_isdst = 0;
  171. tmp_time.tm_gmtoff = 0;
  172. tmp_time.tm_zone = "UTC";
  173. }
  174. else
  175. {
  176. arg = tzoffset_parse(arg, &tmp_time, errstr);
  177. if(arg == NULL) return NULL;
  178. }
  179. // Done extracting directly from arg
  180. tmp_time.tm_mon = -1;
  181. // Because %b/%B is locale-dependent
  182. for(size_t i = 0; i < 12; i++)
  183. {
  184. if(memcmp(month_name, short_month_name[i], 3) == 0)
  185. {
  186. tmp_time.tm_mon = i;
  187. break;
  188. }
  189. }
  190. if(tmp_time.tm_mon < 0)
  191. {
  192. *errstr = "Failed parsing short month name";
  193. errno = 0;
  194. return NULL;
  195. }
  196. memcpy(time, &tmp_time, sizeof(tmp_time));
  197. return arg;
  198. }
  199. // For iso_parse function
  200. // Sets *errstr to NULL when it isn't an email date-time
  201. //
  202. // Check if it could be asctime() format: Thu Nov 24 18:22:48 1986
  203. static char *
  204. asctime_date_parse(char *arg, struct tm *time, const char **errstr)
  205. {
  206. // Kept free of strptime() due to update/overriding being undefined and
  207. // requiring custom parsing, notably locale-free, which strptime() can't handle
  208. // Change `time` only right before returning in case datetime is invalid
  209. struct tm tmp_time = *time;
  210. tmp_time.tm_isdst = -1;
  211. tmp_time.tm_wday = -1;
  212. // asctime() doesn't gives any timezone information, assume UTC
  213. tmp_time.tm_isdst = 0;
  214. tmp_time.tm_gmtoff = 0;
  215. tmp_time.tm_zone = "UTC";
  216. errno = 0;
  217. int parsed = 0;
  218. char month_name[4] = "";
  219. char weekday_name[4] = "";
  220. if(sscanf(arg,
  221. "%3s %3s %d %2d:%2d:%2d %d%n",
  222. weekday_name,
  223. month_name,
  224. &tmp_time.tm_mday,
  225. &tmp_time.tm_hour,
  226. &tmp_time.tm_min,
  227. &tmp_time.tm_sec,
  228. &tmp_time.tm_year,
  229. &parsed) < 7)
  230. {
  231. if(errno == 0 || errno == EINVAL)
  232. {
  233. *errstr = NULL;
  234. }
  235. else
  236. {
  237. *errstr = strerror(errno);
  238. errno = 0;
  239. }
  240. return NULL;
  241. }
  242. arg += parsed;
  243. tmp_time.tm_year -= 1900;
  244. tmp_time.tm_wday = -1;
  245. // Because %a/%A is locale-dependent
  246. for(size_t i = 0; i < 7; i++)
  247. {
  248. if(memcmp(weekday_name, short_weekday_name[i], 3) == 0)
  249. {
  250. tmp_time.tm_wday = i;
  251. break;
  252. }
  253. }
  254. if(tmp_time.tm_wday < 0)
  255. {
  256. *errstr = "Failed parsing short weekday name";
  257. errno = 0;
  258. return NULL;
  259. }
  260. tmp_time.tm_mon = -1;
  261. // Because %b/%B is locale-dependent
  262. for(size_t i = 0; i < 12; i++)
  263. {
  264. if(memcmp(month_name, short_month_name[i], 3) == 0)
  265. {
  266. tmp_time.tm_mon = i;
  267. break;
  268. }
  269. }
  270. if(tmp_time.tm_mon < 0)
  271. {
  272. *errstr = "Failed parsing short month name";
  273. errno = 0;
  274. return NULL;
  275. }
  276. for(; isspace(arg[0]); arg++)
  277. ;
  278. memcpy(time, &tmp_time, sizeof(tmp_time));
  279. return arg;
  280. }
  281. // Sets errstr on failure
  282. // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)?
  283. char *
  284. iso_parse(char *arg, struct tm *time, long *nsec, const char **errstr)
  285. {
  286. *nsec = 0;
  287. // For Alpine's abuild compatibility
  288. if(arg[0] == '@')
  289. {
  290. arg++;
  291. char *endptr = NULL;
  292. time_t now = strtol(arg, &endptr, 10);
  293. if(errno != 0)
  294. {
  295. *errstr = strerror(errno);
  296. errno = 0;
  297. return NULL;
  298. }
  299. gmtime_r(&now, time);
  300. return endptr;
  301. }
  302. char *ret = NULL;
  303. ret = email_date_parse(arg, time, errstr);
  304. if(ret != NULL || *errstr != NULL)
  305. {
  306. return ret;
  307. }
  308. ret = asctime_date_parse(arg, time, errstr);
  309. if(ret != NULL || *errstr != NULL)
  310. {
  311. return ret;
  312. }
  313. // Try parsing as RFC3339 subset of ISO 8601:1988
  314. // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024
  315. // instead a single strptime() call should be done
  316. // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>)
  317. char *s = strptime(arg, "%Y-%m-%d", time);
  318. if(s == NULL)
  319. {
  320. *errstr = "strptime(…, \"%Y-%m-%d\", …) returned NULL";
  321. errno = 0;
  322. return NULL;
  323. }
  324. if(s[0] != 'T' && s[0] != ' ')
  325. {
  326. *errstr = "Couldn't find time-separator (T or space) after date (Y-m-d)";
  327. errno = 0;
  328. return NULL;
  329. }
  330. s++;
  331. s = strptime(s, "%H:%M:%S", time);
  332. if(s == NULL)
  333. {
  334. *errstr = "strptime(…, \"%H:%M:%S\", …) returned NULL";
  335. errno = 0;
  336. return NULL;
  337. }
  338. if(s[0] == ',' || s[0] == '.')
  339. {
  340. double fraction = 0.0;
  341. int parsed = 0;
  342. if(s[0] == ',') s[0] = '.';
  343. if(sscanf(s, "%10lf%n", &fraction, &parsed) < 1)
  344. {
  345. if(errno == 0)
  346. {
  347. *errstr = "Failed to parse fractional seconds";
  348. }
  349. else
  350. {
  351. *errstr = strerror(errno);
  352. errno = 0;
  353. }
  354. return NULL;
  355. }
  356. *nsec = (long)(fraction * 1000000000);
  357. s += parsed;
  358. // too many digits
  359. if(isdigit(s[0]))
  360. {
  361. *errstr = "Too many digits (> 10) for fractional seconds";
  362. return NULL;
  363. }
  364. }
  365. for(; isspace(s[0]); s++)
  366. ;
  367. if(s != NULL && s[0] != '\0')
  368. {
  369. if(s[0] == 'Z' && s[1] == '\0')
  370. {
  371. time->tm_isdst = 0;
  372. time->tm_gmtoff = 0;
  373. time->tm_zone = "UTC";
  374. }
  375. else
  376. {
  377. s = tzoffset_parse(s, time, errstr);
  378. if(s == NULL) return NULL;
  379. }
  380. }
  381. return s;
  382. }
  383. // Because mktime() messes with tm_gmtoff yet doesn't applies it, even in POSIX.1-2024
  384. // Returns (time_t)-1 on failure
  385. // Maybe should be replaced by mktime_z once <https://www.austingroupbugs.net/view.php?id=1794> gets accepted and implemented
  386. time_t
  387. mktime_tz(struct tm *tm)
  388. {
  389. long gmtoff = tm->tm_gmtoff;
  390. const char *zone = tm->tm_zone;
  391. time_t res = mktime(tm);
  392. tm->tm_gmtoff = gmtoff;
  393. tm->tm_zone = zone;
  394. if(res == (time_t)-1) return res;
  395. // 12:00+02:00 corresponds to 10:00Z so needs to be reversed
  396. res += -gmtoff;
  397. return res;
  398. }