logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

iso_parse.c (9541B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _DEFAULT_SOURCE // tm_gmtoff/tm_zone
  5. #define _XOPEN_SOURCE 700 // strptime (NetBSD)
  6. #define _POSIX_C_SOURCE 200809L // st_atim/st_mtim
  7. #include "./iso_parse.h"
  8. #include <assert.h>
  9. #include <ctype.h> /* isdigit */
  10. #include <errno.h> /* errno */
  11. #include <inttypes.h> /* PRId16 */
  12. #include <limits.h> /* TZNAME_MAX */
  13. #include <stdio.h> /* perror, sscanf */
  14. #include <stdlib.h> /* strtol */
  15. #include <string.h> /* memset */
  16. #include <time.h> /* strptime, tm */
  17. static const char *short_weekday_name[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
  18. static const char *short_month_name[12] = {
  19. "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  20. // Parses [+|-]HH:?MM timezone offsets
  21. static char *
  22. tzoffset_parse(char *s, struct tm *time, const char **errstr)
  23. {
  24. #ifndef TZNAME_MAX
  25. #define TZNAME_MAX _POSIX_TZNAME_MAX
  26. #endif
  27. #if TZNAME_MAX < 5
  28. #error TZNAME_MAX is too small
  29. #endif
  30. static char offname[TZNAME_MAX + 1] = "";
  31. int neg;
  32. if(s[0] == '+')
  33. neg = 0;
  34. else if(s[0] == '-')
  35. neg = 1;
  36. else
  37. {
  38. *errstr = "Invalid timezone offset, must start with + or -";
  39. return NULL;
  40. }
  41. size_t offname_i = 0;
  42. offname[offname_i++] = *s++;
  43. if(isdigit(s[0]) && isdigit(s[1]))
  44. {
  45. time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600;
  46. offname[offname_i++] = *s++;
  47. offname[offname_i++] = *s++;
  48. }
  49. else
  50. {
  51. *errstr = "Invalid timezone offset, no digits after <+|->";
  52. return NULL;
  53. }
  54. if(s[0] == ':') s++;
  55. if(isdigit(s[0]) && isdigit(s[1]))
  56. {
  57. time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60;
  58. offname[offname_i++] = *s++;
  59. offname[offname_i++] = *s++;
  60. }
  61. else
  62. {
  63. *errstr = "Invalid timezone offset, no digits after <+|->HH[:]";
  64. return NULL;
  65. }
  66. if(neg) time->tm_gmtoff = -time->tm_gmtoff;
  67. offname[offname_i++] = '\0';
  68. time->tm_isdst = 0;
  69. time->tm_zone = offname;
  70. return s;
  71. }
  72. // For iso_parse function
  73. // Sets *errstr to NULL when it isn't an email date-time
  74. //
  75. // Check if it could be Email / Internet Message Format datetime
  76. // - Ignores RFC822 (ARPA era, folding space, 2-digit year)
  77. // - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs)
  78. //
  79. // RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm"
  80. static char *
  81. email_date_parse(char *arg, struct tm *time, const char **errstr)
  82. {
  83. // Kept free of strptime() due to update/overriding being undefined and
  84. // requiring custom parsing, notably locale-free, which strptime() can't handle
  85. for(; isspace(arg[0]); arg++)
  86. ;
  87. // Change `time` only right before returning in case datetime is invalid
  88. struct tm tmp_time = *time;
  89. tmp_time.tm_isdst = -1;
  90. tmp_time.tm_wday = -1;
  91. if(arg[3] == ',')
  92. {
  93. // Because %a/%A is locale-dependent, Sunday is tm_wday=0
  94. for(size_t i = 0; i < 7; i++)
  95. {
  96. if(memcmp(arg, short_weekday_name[i], 3) == 0)
  97. {
  98. tmp_time.tm_wday = i;
  99. break;
  100. }
  101. }
  102. if(tmp_time.tm_wday == -1)
  103. {
  104. *errstr = "Failed parsing short weekday name";
  105. errno = 0;
  106. return NULL;
  107. }
  108. arg += 4;
  109. for(; isspace(arg[0]); arg++)
  110. ;
  111. }
  112. errno = 0;
  113. int parsed = 0;
  114. char month_name[4] = "";
  115. if(sscanf(arg,
  116. "%2d %3s %d %2d:%2d%n",
  117. &tmp_time.tm_mday,
  118. month_name,
  119. &tmp_time.tm_year,
  120. &tmp_time.tm_hour,
  121. &tmp_time.tm_min,
  122. &parsed) < 5)
  123. {
  124. if(errno == 0 || errno == EINVAL)
  125. {
  126. if(tmp_time.tm_wday == -1)
  127. {
  128. *errstr = NULL;
  129. }
  130. else
  131. {
  132. *errstr = "Failed parsing Email-datetime";
  133. }
  134. }
  135. else
  136. {
  137. *errstr = strerror(errno);
  138. errno = 0;
  139. }
  140. return NULL;
  141. }
  142. if(tmp_time.tm_year < 49)
  143. {
  144. tmp_time.tm_year += 100; // 2000-2049
  145. }
  146. else if(tmp_time.tm_year > 99)
  147. {
  148. tmp_time.tm_year -= 1900;
  149. }
  150. arg += parsed;
  151. if(arg[0] == ':' && isdigit(arg[1]))
  152. {
  153. if(isdigit(arg[2]))
  154. {
  155. tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0');
  156. arg += 3;
  157. }
  158. else
  159. {
  160. tmp_time.tm_sec = arg[1] - '0';
  161. arg += 2;
  162. }
  163. }
  164. for(; isspace(arg[0]); arg++)
  165. ;
  166. // Consider that nobody is going to transmit a timezone name which isn't GMT
  167. if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3])))
  168. {
  169. tmp_time.tm_isdst = 0;
  170. tmp_time.tm_gmtoff = 0;
  171. tmp_time.tm_zone = "UTC";
  172. }
  173. else
  174. {
  175. arg = tzoffset_parse(arg, &tmp_time, errstr);
  176. if(arg == NULL) return NULL;
  177. }
  178. // Done extracting directly from arg
  179. tmp_time.tm_mon = -1;
  180. // Because %b/%B is locale-dependent
  181. for(size_t i = 0; i < 12; i++)
  182. {
  183. if(memcmp(month_name, short_month_name[i], 3) == 0)
  184. {
  185. tmp_time.tm_mon = i;
  186. break;
  187. }
  188. }
  189. if(tmp_time.tm_mon < 0)
  190. {
  191. *errstr = "Failed parsing short month name";
  192. errno = 0;
  193. return NULL;
  194. }
  195. memcpy(time, &tmp_time, sizeof(tmp_time));
  196. return arg;
  197. }
  198. // For iso_parse function
  199. // Sets *errstr to NULL when it isn't an email date-time
  200. //
  201. // Check if it could be asctime() format: Thu Nov 24 18:22:48 1986
  202. static char *
  203. asctime_date_parse(char *arg, struct tm *time, const char **errstr)
  204. {
  205. // Kept free of strptime() due to update/overriding being undefined and
  206. // requiring custom parsing, notably locale-free, which strptime() can't handle
  207. // Change `time` only right before returning in case datetime is invalid
  208. struct tm tmp_time = *time;
  209. tmp_time.tm_isdst = -1;
  210. tmp_time.tm_wday = -1;
  211. // asctime() doesn't gives any timezone information, assume UTC
  212. tmp_time.tm_isdst = 0;
  213. tmp_time.tm_gmtoff = 0;
  214. tmp_time.tm_zone = "UTC";
  215. errno = 0;
  216. int parsed = 0;
  217. char month_name[4] = "";
  218. char weekday_name[4] = "";
  219. if(sscanf(arg,
  220. "%3s %3s %d %2d:%2d:%2d %d%n",
  221. weekday_name,
  222. month_name,
  223. &tmp_time.tm_mday,
  224. &tmp_time.tm_hour,
  225. &tmp_time.tm_min,
  226. &tmp_time.tm_sec,
  227. &tmp_time.tm_year,
  228. &parsed) < 7)
  229. {
  230. if(errno == 0 || errno == EINVAL)
  231. {
  232. *errstr = NULL;
  233. }
  234. else
  235. {
  236. *errstr = strerror(errno);
  237. errno = 0;
  238. }
  239. return NULL;
  240. }
  241. arg += parsed;
  242. tmp_time.tm_year -= 1900;
  243. tmp_time.tm_wday = -1;
  244. // Because %a/%A is locale-dependent
  245. for(size_t i = 0; i < 7; i++)
  246. {
  247. if(memcmp(weekday_name, short_weekday_name[i], 3) == 0)
  248. {
  249. tmp_time.tm_wday = i;
  250. break;
  251. }
  252. }
  253. if(tmp_time.tm_wday < 0)
  254. {
  255. *errstr = "Failed parsing short weekday name";
  256. errno = 0;
  257. return NULL;
  258. }
  259. tmp_time.tm_mon = -1;
  260. // Because %b/%B is locale-dependent
  261. for(size_t i = 0; i < 12; i++)
  262. {
  263. if(memcmp(month_name, short_month_name[i], 3) == 0)
  264. {
  265. tmp_time.tm_mon = i;
  266. break;
  267. }
  268. }
  269. if(tmp_time.tm_mon < 0)
  270. {
  271. *errstr = "Failed parsing short month name";
  272. errno = 0;
  273. return NULL;
  274. }
  275. for(; isspace(arg[0]); arg++)
  276. ;
  277. memcpy(time, &tmp_time, sizeof(tmp_time));
  278. return arg;
  279. }
  280. // Sets errstr on failure
  281. // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)?
  282. char *
  283. iso_parse(char *arg, struct tm *time, long *nsec, const char **errstr)
  284. {
  285. *nsec = 0;
  286. // For Alpine's abuild compatibility
  287. if(arg[0] == '@')
  288. {
  289. arg++;
  290. char *endptr = NULL;
  291. time_t now = strtol(arg, &endptr, 10);
  292. if(errno != 0)
  293. {
  294. *errstr = strerror(errno);
  295. errno = 0;
  296. return NULL;
  297. }
  298. gmtime_r(&now, time);
  299. return endptr;
  300. }
  301. char *ret = NULL;
  302. ret = email_date_parse(arg, time, errstr);
  303. if(ret != NULL || *errstr != NULL)
  304. {
  305. return ret;
  306. }
  307. ret = asctime_date_parse(arg, time, errstr);
  308. if(ret != NULL || *errstr != NULL)
  309. {
  310. return ret;
  311. }
  312. // Try parsing as RFC3339 subset of ISO 8601:1988
  313. // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024
  314. // instead a single strptime() call should be done
  315. // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>)
  316. char *s = strptime(arg, "%Y-%m-%d", time);
  317. if(s == NULL)
  318. {
  319. *errstr = "strptime(…, \"%Y-%m-%d\", …) returned NULL";
  320. errno = 0;
  321. return NULL;
  322. }
  323. if(s[0] != 'T' && s[0] != ' ')
  324. {
  325. *errstr = "Couldn't find time-separator (T or space) after date (Y-m-d)";
  326. errno = 0;
  327. return NULL;
  328. }
  329. s++;
  330. s = strptime(s, "%H:%M:%S", time);
  331. if(s == NULL)
  332. {
  333. *errstr = "strptime(…, \"%H:%M:%S\", …) returned NULL";
  334. errno = 0;
  335. return NULL;
  336. }
  337. if(s[0] == ',' || s[0] == '.')
  338. {
  339. double fraction = 0.0;
  340. int parsed = 0;
  341. if(s[0] == ',') s[0] = '.';
  342. if(sscanf(s, "%10lf%n", &fraction, &parsed) < 1)
  343. {
  344. if(errno == 0)
  345. {
  346. *errstr = "Failed to parse fractional seconds";
  347. }
  348. else
  349. {
  350. *errstr = strerror(errno);
  351. errno = 0;
  352. }
  353. return NULL;
  354. }
  355. *nsec = (long)(fraction * 1000000000);
  356. s += parsed;
  357. // too many digits
  358. if(isdigit(s[0]))
  359. {
  360. *errstr = "Too many digits (> 10) for fractional seconds";
  361. return NULL;
  362. }
  363. }
  364. for(; isspace(s[0]); s++)
  365. ;
  366. if(s != NULL && s[0] != '\0')
  367. {
  368. if(s[0] == 'Z' && s[1] == '\0')
  369. {
  370. time->tm_isdst = 0;
  371. time->tm_gmtoff = 0;
  372. time->tm_zone = "UTC";
  373. }
  374. else
  375. {
  376. s = tzoffset_parse(s, time, errstr);
  377. if(s == NULL) return NULL;
  378. }
  379. }
  380. return s;
  381. }
  382. // Because mktime() messes with tm_gmtoff yet doesn't applies it, even in POSIX.1-2024
  383. // Returns (time_t)-1 on failure
  384. time_t
  385. mktime_tz(struct tm *tm)
  386. {
  387. long gmtoff = tm->tm_gmtoff;
  388. const char *zone = tm->tm_zone;
  389. time_t res = mktime(tm);
  390. tm->tm_gmtoff = gmtoff;
  391. tm->tm_zone = zone;
  392. if(res == (time_t)-1) return res;
  393. // 12:00+02:00 corresponds to 10:00Z so needs to be reversed
  394. res += -gmtoff;
  395. return res;
  396. }