logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

datetime_parse.c (9953B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _DEFAULT_SOURCE // tm_gmtoff/tm_zone
  5. #define _XOPEN_SOURCE 700 // strptime (NetBSD)
  6. #define _POSIX_C_SOURCE 200809L // st_atim/st_mtim
  7. #include "./datetime_parse.h"
  8. #include <assert.h>
  9. #include <ctype.h> /* isdigit */
  10. #include <errno.h> /* errno */
  11. #include <inttypes.h> /* PRId16 */
  12. #include <limits.h> /* TZNAME_MAX */
  13. #include <stdio.h> /* perror, sscanf */
  14. #include <stdlib.h> /* strtol */
  15. #include <string.h> /* memset */
  16. #include <time.h> /* strptime, tm */
  17. static const char *short_weekday_name[7] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
  18. static const char *short_month_name[12] = {
  19. "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  20. // Parses [+|-]HH:?MM timezone offsets
  21. // Would need tzalloc from <https://www.austingroupbugs.net/view.php?id=1794> to parse timezone names
  22. static char *
  23. tzoffset_parse(char *s, struct tm *time, const char **errstr)
  24. {
  25. #ifndef TZNAME_MAX
  26. #define TZNAME_MAX _POSIX_TZNAME_MAX
  27. #endif
  28. #if TZNAME_MAX < 5
  29. #error TZNAME_MAX is too small
  30. #endif
  31. static char offname[TZNAME_MAX + 1] = "";
  32. int neg;
  33. if(s[0] == '+')
  34. neg = 0;
  35. else if(s[0] == '-')
  36. neg = 1;
  37. else
  38. {
  39. *errstr = "Invalid timezone offset, must start with + or -";
  40. return NULL;
  41. }
  42. size_t offname_i = 0;
  43. offname[offname_i++] = *s++;
  44. if(isdigit(s[0]) && isdigit(s[1]))
  45. {
  46. time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600;
  47. offname[offname_i++] = *s++;
  48. offname[offname_i++] = *s++;
  49. }
  50. else
  51. {
  52. *errstr = "Invalid timezone offset, no digits after <+|->";
  53. return NULL;
  54. }
  55. if(s[0] == ':') s++;
  56. if(isdigit(s[0]) && isdigit(s[1]))
  57. {
  58. time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60;
  59. offname[offname_i++] = *s++;
  60. offname[offname_i++] = *s++;
  61. }
  62. else
  63. {
  64. *errstr = "Invalid timezone offset, no digits after <+|->HH[:]";
  65. return NULL;
  66. }
  67. if(neg) time->tm_gmtoff = -time->tm_gmtoff;
  68. offname[offname_i++] = '\0';
  69. time->tm_isdst = 0;
  70. time->tm_zone = offname;
  71. return s;
  72. }
  73. // Sets *errstr to NULL when it isn't an email date-time
  74. //
  75. // Check if it could be Email / Internet Message Format datetime
  76. // - Ignores RFC822 (ARPA era, folding space, 2-digit year)
  77. // - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs)
  78. //
  79. // RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm"
  80. static char *
  81. email_datetime_parse(char *arg, struct tm *time, const char **errstr)
  82. {
  83. // Kept free of strptime() due to update/overriding being undefined and
  84. // requiring custom parsing, notably locale-free, which strptime() can't handle
  85. for(; isspace(arg[0]); arg++)
  86. ;
  87. // Change `time` only right before returning in case datetime is invalid
  88. struct tm tmp_time = *time;
  89. tmp_time.tm_isdst = -1;
  90. tmp_time.tm_wday = -1;
  91. if(arg[3] == ',')
  92. {
  93. // Because %a/%A is locale-dependent, Sunday is tm_wday=0
  94. for(size_t i = 0; i < 7; i++)
  95. {
  96. if(memcmp(arg, short_weekday_name[i], 3) == 0)
  97. {
  98. tmp_time.tm_wday = i;
  99. break;
  100. }
  101. }
  102. if(tmp_time.tm_wday == -1)
  103. {
  104. *errstr = "Failed parsing short weekday name";
  105. errno = 0;
  106. return NULL;
  107. }
  108. arg += 4;
  109. for(; isspace(arg[0]); arg++)
  110. ;
  111. }
  112. errno = 0;
  113. int parsed = 0;
  114. char month_name[4] = "";
  115. if(sscanf(arg,
  116. "%2d %3s %d %2d:%2d%n",
  117. &tmp_time.tm_mday,
  118. month_name,
  119. &tmp_time.tm_year,
  120. &tmp_time.tm_hour,
  121. &tmp_time.tm_min,
  122. &parsed) < 5)
  123. {
  124. if(errno == 0 || errno == EINVAL)
  125. {
  126. if(tmp_time.tm_wday == -1)
  127. {
  128. *errstr = NULL;
  129. }
  130. else
  131. {
  132. *errstr = "Failed parsing Email-datetime";
  133. }
  134. }
  135. else
  136. {
  137. *errstr = strerror(errno);
  138. errno = 0;
  139. }
  140. return NULL;
  141. }
  142. if(tmp_time.tm_year < 49)
  143. {
  144. tmp_time.tm_year += 100; // 2000-2049
  145. }
  146. else if(tmp_time.tm_year > 99)
  147. {
  148. tmp_time.tm_year -= 1900;
  149. }
  150. arg += parsed;
  151. if(arg[0] == ':' && isdigit(arg[1]))
  152. {
  153. if(isdigit(arg[2]))
  154. {
  155. tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0');
  156. arg += 3;
  157. }
  158. else
  159. {
  160. tmp_time.tm_sec = arg[1] - '0';
  161. arg += 2;
  162. }
  163. }
  164. for(; isspace(arg[0]); arg++)
  165. ;
  166. // Consider that nobody is going to transmit a timezone name which isn't GMT
  167. if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3])))
  168. {
  169. tmp_time.tm_isdst = 0;
  170. tmp_time.tm_gmtoff = 0;
  171. tmp_time.tm_zone = "UTC";
  172. }
  173. else
  174. {
  175. arg = tzoffset_parse(arg, &tmp_time, errstr);
  176. if(arg == NULL) return NULL;
  177. }
  178. // Done extracting directly from arg
  179. tmp_time.tm_mon = -1;
  180. // Because %b/%B is locale-dependent
  181. for(size_t i = 0; i < 12; i++)
  182. {
  183. if(memcmp(month_name, short_month_name[i], 3) == 0)
  184. {
  185. tmp_time.tm_mon = i;
  186. break;
  187. }
  188. }
  189. if(tmp_time.tm_mon < 0)
  190. {
  191. *errstr = "Failed parsing short month name";
  192. errno = 0;
  193. return NULL;
  194. }
  195. memcpy(time, &tmp_time, sizeof(tmp_time));
  196. return arg;
  197. }
  198. // Sets *errstr to NULL when it isn't an email date-time
  199. //
  200. // Check if it could be asctime() format: Thu Nov 24 18:22:48 1986
  201. static char *
  202. asctime_datetime_parse(char *arg, struct tm *time, const char **errstr)
  203. {
  204. // Kept free of strptime() due to update/overriding being undefined and
  205. // requiring custom parsing, notably locale-free, which strptime() can't handle
  206. // Change `time` only right before returning in case datetime is invalid
  207. struct tm tmp_time = *time;
  208. tmp_time.tm_isdst = -1;
  209. tmp_time.tm_wday = -1;
  210. // asctime() doesn't gives any timezone information, assume UTC
  211. tmp_time.tm_isdst = 0;
  212. tmp_time.tm_gmtoff = 0;
  213. tmp_time.tm_zone = "UTC";
  214. errno = 0;
  215. int parsed = 0;
  216. char month_name[4] = "";
  217. char weekday_name[4] = "";
  218. if(sscanf(arg,
  219. "%3s %3s %d %2d:%2d:%2d %d%n",
  220. weekday_name,
  221. month_name,
  222. &tmp_time.tm_mday,
  223. &tmp_time.tm_hour,
  224. &tmp_time.tm_min,
  225. &tmp_time.tm_sec,
  226. &tmp_time.tm_year,
  227. &parsed) < 7)
  228. {
  229. if(errno == 0 || errno == EINVAL)
  230. {
  231. *errstr = NULL;
  232. }
  233. else
  234. {
  235. *errstr = strerror(errno);
  236. errno = 0;
  237. }
  238. return NULL;
  239. }
  240. arg += parsed;
  241. tmp_time.tm_year -= 1900;
  242. tmp_time.tm_wday = -1;
  243. // Because %a/%A is locale-dependent
  244. for(size_t i = 0; i < 7; i++)
  245. {
  246. if(memcmp(weekday_name, short_weekday_name[i], 3) == 0)
  247. {
  248. tmp_time.tm_wday = i;
  249. break;
  250. }
  251. }
  252. if(tmp_time.tm_wday < 0)
  253. {
  254. *errstr = "Failed parsing short weekday name";
  255. errno = 0;
  256. return NULL;
  257. }
  258. tmp_time.tm_mon = -1;
  259. // Because %b/%B is locale-dependent
  260. for(size_t i = 0; i < 12; i++)
  261. {
  262. if(memcmp(month_name, short_month_name[i], 3) == 0)
  263. {
  264. tmp_time.tm_mon = i;
  265. break;
  266. }
  267. }
  268. if(tmp_time.tm_mon < 0)
  269. {
  270. *errstr = "Failed parsing short month name";
  271. errno = 0;
  272. return NULL;
  273. }
  274. for(; isspace(arg[0]); arg++)
  275. ;
  276. memcpy(time, &tmp_time, sizeof(tmp_time));
  277. return arg;
  278. }
  279. // Sets errstr on failure
  280. // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)?
  281. static char *
  282. iso_datetime_parse(char *arg, struct tm *time, long *nsec, const char **errstr)
  283. {
  284. // Try parsing as RFC3339 subset of ISO 8601:1988
  285. // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024
  286. // instead a single strptime() call should be done
  287. // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>)
  288. char *s = strptime(arg, "%Y-%m-%d", time);
  289. if(s == NULL)
  290. {
  291. *errstr = "strptime(…, \"%Y-%m-%d\", …) returned NULL";
  292. errno = 0;
  293. return NULL;
  294. }
  295. if(s[0] != 'T' && s[0] != ' ')
  296. {
  297. *errstr = "Couldn't find time-separator (T or space) after date (Y-m-d)";
  298. errno = 0;
  299. return NULL;
  300. }
  301. s++;
  302. for(; isspace(s[0]); s++)
  303. ;
  304. s = strptime(s, "%H:%M:%S", time);
  305. if(s == NULL)
  306. {
  307. *errstr = "strptime(…, \"%H:%M:%S\", …) returned NULL";
  308. errno = 0;
  309. return NULL;
  310. }
  311. if(s[0] == ',' || s[0] == '.')
  312. {
  313. double fraction = 0.0;
  314. int parsed = 0;
  315. if(s[0] == ',') s[0] = '.';
  316. if(sscanf(s, "%10lf%n", &fraction, &parsed) < 1)
  317. {
  318. if(errno == 0)
  319. {
  320. *errstr = "Failed to parse fractional seconds";
  321. }
  322. else
  323. {
  324. *errstr = strerror(errno);
  325. errno = 0;
  326. }
  327. return NULL;
  328. }
  329. *nsec = (long)(fraction * 1000000000);
  330. s += parsed;
  331. // too many digits
  332. if(isdigit(s[0]))
  333. {
  334. *errstr = "Too many digits (> 10) for fractional seconds";
  335. return NULL;
  336. }
  337. }
  338. for(; isspace(s[0]); s++)
  339. ;
  340. if(s != NULL && s[0] != '\0')
  341. {
  342. if(s[0] == 'Z' && s[1] == '\0')
  343. {
  344. time->tm_isdst = 0;
  345. time->tm_gmtoff = 0;
  346. time->tm_zone = "UTC";
  347. }
  348. else
  349. {
  350. s = tzoffset_parse(s, time, errstr);
  351. if(s == NULL) return NULL;
  352. }
  353. }
  354. return s;
  355. }
  356. // Sets errstr on failure
  357. char *
  358. datetime_parse(char *arg, struct tm *time, long *nsec, const char **errstr)
  359. {
  360. *nsec = 0;
  361. // For Alpine's abuild compatibility
  362. if(arg[0] == '@')
  363. {
  364. arg++;
  365. char *endptr = NULL;
  366. time_t now = strtol(arg, &endptr, 10);
  367. if(errno != 0)
  368. {
  369. *errstr = strerror(errno);
  370. errno = 0;
  371. return NULL;
  372. }
  373. gmtime_r(&now, time);
  374. return endptr;
  375. }
  376. char *ret = NULL;
  377. ret = email_datetime_parse(arg, time, errstr);
  378. if(ret != NULL || *errstr != NULL)
  379. {
  380. return ret;
  381. }
  382. ret = asctime_datetime_parse(arg, time, errstr);
  383. if(ret != NULL || *errstr != NULL)
  384. {
  385. return ret;
  386. }
  387. return iso_datetime_parse(arg, time, nsec, errstr);
  388. }
  389. // Because mktime() messes with tm_gmtoff yet doesn't applies it, even in POSIX.1-2024
  390. // Returns (time_t)-1 on failure
  391. // Maybe should be replaced by mktime_z once <https://www.austingroupbugs.net/view.php?id=1794> gets accepted and implemented
  392. time_t
  393. mktime_tz(struct tm *tm)
  394. {
  395. long gmtoff = tm->tm_gmtoff;
  396. const char *zone = tm->tm_zone;
  397. time_t res = mktime(tm);
  398. tm->tm_gmtoff = gmtoff;
  399. tm->tm_zone = zone;
  400. if(res == (time_t)-1) return res;
  401. // 12:00+02:00 corresponds to 10:00Z so needs to be reversed
  402. res += -gmtoff;
  403. return res;
  404. }