logo

utils-std

Collection of commonly available Unix tools git clone https://anongit.hacktivis.me/git/utils-std.git/

datetime_parse.c (9071B)


  1. // utils-std: Collection of commonly available Unix tools
  2. // SPDX-FileCopyrightText: 2017 Haelwenn (lanodan) Monnier <contact+utils@hacktivis.me>
  3. // SPDX-License-Identifier: MPL-2.0
  4. #define _DEFAULT_SOURCE // tm_gmtoff/tm_zone, timegm (POSIX.1-2024 mktime() future directions)
  5. #define _XOPEN_SOURCE 700 // strptime (NetBSD)
  6. #define _POSIX_C_SOURCE 200809L // st_atim/st_mtim
  7. #include "./datetime_parse.h"
  8. #include <assert.h>
  9. #include <ctype.h> /* isdigit */
  10. #include <errno.h> /* errno */
  11. #include <inttypes.h> /* PRId16 */
  12. #include <limits.h> /* TZNAME_MAX */
  13. #include <stdio.h> /* perror, sscanf */
  14. #include <stdlib.h> /* strtol */
  15. #include <string.h> /* memset */
  16. #include <time.h> /* strptime, tm */
  17. static const char short_weekday_name[7][3] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
  18. static const char short_month_name[12][3] = {
  19. "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
  20. // Parses [+|-]HH:?MM timezone offsets
  21. // Would need tzalloc from <https://www.austingroupbugs.net/view.php?id=1794> to parse timezone names
  22. static char *
  23. tzoffset_parse(char *s, struct tm *time, const char **errstr)
  24. {
  25. int neg;
  26. if(s[0] == '+')
  27. neg = 0;
  28. else if(s[0] == '-')
  29. neg = 1;
  30. else
  31. {
  32. *errstr = "Invalid timezone offset, must start with + or -";
  33. return NULL;
  34. }
  35. s++;
  36. if(isdigit(s[0]) && isdigit(s[1]))
  37. {
  38. time->tm_gmtoff = (s[0] - '0') * 36000 + (s[1] - '0') * 3600;
  39. s += 2;
  40. }
  41. else
  42. {
  43. *errstr = "Invalid timezone offset, no digits after <+|->";
  44. return NULL;
  45. }
  46. if(s[0] == ':') s++;
  47. if(isdigit(s[0]) && isdigit(s[1]))
  48. {
  49. time->tm_gmtoff += (s[0] - '0') * 600 + (s[1] - '0') * 60;
  50. }
  51. else
  52. {
  53. *errstr = "Invalid timezone offset, no digits after <+|->HH[:]";
  54. return NULL;
  55. }
  56. if(neg) time->tm_gmtoff = -time->tm_gmtoff;
  57. time->tm_isdst = 0;
  58. time->tm_zone = NULL;
  59. return s;
  60. }
  61. // Sets *errstr to NULL when it isn't an email date-time
  62. //
  63. // Check if it could be Email / Internet Message Format datetime
  64. // - Ignores RFC822 (ARPA era, folding space, 2-digit year)
  65. // - Uses RFC5322 / RFC2822 with ignoring RFC822 obsolete formats (aka obs)
  66. //
  67. // RFC5322 and RFC2822 (no obs): "([ ]*Day,)[ ]*DD[ ]+Mon[ ]+YYYY[ ]+HH:MM(:SS)?[ ]+[+/-]hhmm"
  68. static char *
  69. email_datetime_parse(char *arg, time_t *epoch, const char **errstr)
  70. {
  71. // Kept free of strptime() due to update/overriding being undefined and
  72. // requiring custom parsing, notably locale-free, which strptime() can't handle
  73. for(; isspace(arg[0]); arg++)
  74. ;
  75. // Change `time` only right before returning in case datetime is invalid
  76. struct tm tmp_time = {
  77. .tm_isdst = -1,
  78. .tm_wday = -1,
  79. };
  80. if(arg[3] == ',')
  81. {
  82. // Because %a/%A is locale-dependent, Sunday is tm_wday=0
  83. for(size_t i = 0; i < 7; i++)
  84. {
  85. if(memcmp(arg, short_weekday_name[i], 3) == 0)
  86. {
  87. tmp_time.tm_wday = i;
  88. break;
  89. }
  90. }
  91. if(tmp_time.tm_wday == -1)
  92. {
  93. *errstr = "Failed parsing short weekday name";
  94. errno = 0;
  95. return NULL;
  96. }
  97. arg += 4;
  98. for(; isspace(arg[0]); arg++)
  99. ;
  100. }
  101. errno = 0;
  102. int parsed = 0;
  103. char month_name[4] = "";
  104. if(sscanf(arg,
  105. "%2d %3s %d %2d:%2d%n",
  106. &tmp_time.tm_mday,
  107. month_name,
  108. &tmp_time.tm_year,
  109. &tmp_time.tm_hour,
  110. &tmp_time.tm_min,
  111. &parsed) < 5)
  112. {
  113. if(errno == 0 || errno == EINVAL)
  114. {
  115. if(tmp_time.tm_wday == -1)
  116. {
  117. *errstr = NULL;
  118. }
  119. else
  120. {
  121. *errstr = "Failed parsing Email-datetime";
  122. }
  123. }
  124. else
  125. {
  126. *errstr = strerror(errno);
  127. errno = 0;
  128. }
  129. return NULL;
  130. }
  131. if(tmp_time.tm_year < 49)
  132. {
  133. tmp_time.tm_year += 100; // 2000-2049
  134. }
  135. else if(tmp_time.tm_year > 99)
  136. {
  137. tmp_time.tm_year -= 1900;
  138. }
  139. arg += parsed;
  140. if(arg[0] == ':' && isdigit(arg[1]))
  141. {
  142. if(isdigit(arg[2]))
  143. {
  144. tmp_time.tm_sec = (arg[1] - '0') * 10 + (arg[2] - '0');
  145. arg += 3;
  146. }
  147. else
  148. {
  149. tmp_time.tm_sec = arg[1] - '0';
  150. arg += 2;
  151. }
  152. }
  153. for(; isspace(arg[0]); arg++)
  154. ;
  155. // Consider that nobody is going to transmit a timezone name which isn't GMT
  156. if(arg[0] == 'G' && arg[1] == 'M' && arg[2] == 'T' && (arg[3] == '\0' || isspace(arg[3])))
  157. {
  158. tmp_time.tm_isdst = 0;
  159. tmp_time.tm_gmtoff = 0;
  160. tmp_time.tm_zone = "UTC";
  161. }
  162. else
  163. {
  164. arg = tzoffset_parse(arg, &tmp_time, errstr);
  165. if(arg == NULL) return NULL;
  166. }
  167. // Done extracting directly from arg
  168. tmp_time.tm_mon = -1;
  169. // Because %b/%B is locale-dependent
  170. for(size_t i = 0; i < 12; i++)
  171. {
  172. if(memcmp(month_name, short_month_name[i], 3) == 0)
  173. {
  174. tmp_time.tm_mon = i;
  175. break;
  176. }
  177. }
  178. if(tmp_time.tm_mon < 0)
  179. {
  180. *errstr = "Failed parsing short month name";
  181. errno = 0;
  182. return NULL;
  183. }
  184. *epoch = utils_timegm(&tmp_time);
  185. return arg;
  186. }
  187. // Sets *errstr to NULL when it isn't an email date-time
  188. //
  189. // Check if it could be asctime() format: Thu Nov 24 18:22:48 1986
  190. static char *
  191. asctime_datetime_parse(char *arg, time_t *epoch, const char **errstr)
  192. {
  193. // Kept free of strptime() due to update/overriding being undefined and
  194. // requiring custom parsing, notably locale-free, which strptime() can't handle
  195. struct tm tmp_time = {
  196. .tm_isdst = -1,
  197. .tm_wday = -1,
  198. };
  199. // asctime() doesn't gives any timezone information, assume UTC
  200. tmp_time.tm_isdst = 0;
  201. tmp_time.tm_gmtoff = 0;
  202. tmp_time.tm_zone = "UTC";
  203. errno = 0;
  204. int parsed = 0;
  205. char month_name[4] = "";
  206. char weekday_name[4] = "";
  207. if(sscanf(arg,
  208. "%3s %3s %d %2d:%2d:%2d %d%n",
  209. weekday_name,
  210. month_name,
  211. &tmp_time.tm_mday,
  212. &tmp_time.tm_hour,
  213. &tmp_time.tm_min,
  214. &tmp_time.tm_sec,
  215. &tmp_time.tm_year,
  216. &parsed) < 7)
  217. {
  218. if(errno == 0 || errno == EINVAL)
  219. {
  220. *errstr = NULL;
  221. }
  222. else
  223. {
  224. *errstr = strerror(errno);
  225. errno = 0;
  226. }
  227. return NULL;
  228. }
  229. arg += parsed;
  230. tmp_time.tm_year -= 1900;
  231. tmp_time.tm_wday = -1;
  232. // Because %a/%A is locale-dependent
  233. for(size_t i = 0; i < 7; i++)
  234. {
  235. if(memcmp(weekday_name, short_weekday_name[i], 3) == 0)
  236. {
  237. tmp_time.tm_wday = i;
  238. break;
  239. }
  240. }
  241. if(tmp_time.tm_wday < 0)
  242. {
  243. *errstr = "Failed parsing short weekday name";
  244. errno = 0;
  245. return NULL;
  246. }
  247. tmp_time.tm_mon = -1;
  248. // Because %b/%B is locale-dependent
  249. for(size_t i = 0; i < 12; i++)
  250. {
  251. if(memcmp(month_name, short_month_name[i], 3) == 0)
  252. {
  253. tmp_time.tm_mon = i;
  254. break;
  255. }
  256. }
  257. if(tmp_time.tm_mon < 0)
  258. {
  259. *errstr = "Failed parsing short month name";
  260. errno = 0;
  261. return NULL;
  262. }
  263. for(; isspace(arg[0]); arg++)
  264. ;
  265. *epoch = utils_timegm(&tmp_time);
  266. return arg;
  267. }
  268. // Sets errstr on failure
  269. // YYYY-MM-DD[T ]hh:mm:SS([,\.]frac)?(Z|[+\-]hh:?mm)?
  270. static char *
  271. iso_datetime_parse(char *arg, time_t *epoch, long *nsec, const char **errstr)
  272. {
  273. // Try parsing as RFC3339 subset of ISO 8601:1988
  274. struct tm tmp_time = {
  275. .tm_isdst = -1,
  276. .tm_wday = -1,
  277. };
  278. // FIXME?: Calling strptime() multiple times is explicitly unspecified in POSIX.1-2024
  279. // instead a single strptime() call should be done
  280. // No %F in POSIX prior to POSIX.1-2024 (<https://www.austingroupbugs.net/view.php?id=920>)
  281. char *s = strptime(arg, "%Y-%m-%d", &tmp_time);
  282. if(s == NULL)
  283. {
  284. *errstr = "strptime(…, \"%Y-%m-%d\", …) returned NULL";
  285. errno = 0;
  286. return NULL;
  287. }
  288. if(s[0] != 'T' && s[0] != ' ')
  289. {
  290. *errstr = "Couldn't find time-separator (T or space) after date (Y-m-d)";
  291. errno = 0;
  292. return NULL;
  293. }
  294. s++;
  295. for(; isspace(s[0]); s++)
  296. ;
  297. s = strptime(s, "%H:%M:%S", &tmp_time);
  298. if(s == NULL)
  299. {
  300. *errstr = "strptime(…, \"%H:%M:%S\", …) returned NULL";
  301. errno = 0;
  302. return NULL;
  303. }
  304. if(s[0] == ',' || s[0] == '.')
  305. {
  306. double fraction = 0.0;
  307. int parsed = 0;
  308. if(s[0] == ',') s[0] = '.';
  309. if(sscanf(s, "%10lf%n", &fraction, &parsed) < 1)
  310. {
  311. if(errno == 0)
  312. {
  313. *errstr = "Failed to parse fractional seconds";
  314. }
  315. else
  316. {
  317. *errstr = strerror(errno);
  318. errno = 0;
  319. }
  320. return NULL;
  321. }
  322. *nsec = (long)(fraction * 1000000000);
  323. s += parsed;
  324. // too many digits
  325. if(isdigit(s[0]))
  326. {
  327. *errstr = "Too many digits (> 10) for fractional seconds";
  328. return NULL;
  329. }
  330. }
  331. for(; isspace(s[0]); s++)
  332. ;
  333. if(s != NULL && s[0] != '\0')
  334. {
  335. if(s[0] == 'Z' && s[1] == '\0')
  336. {
  337. tmp_time.tm_isdst = 0;
  338. tmp_time.tm_gmtoff = 0;
  339. tmp_time.tm_zone = "UTC";
  340. }
  341. else
  342. {
  343. s = tzoffset_parse(s, &tmp_time, errstr);
  344. if(s == NULL) return NULL;
  345. }
  346. }
  347. *epoch = utils_timegm(&tmp_time);
  348. return s;
  349. }
  350. // Sets errstr on failure
  351. char *
  352. datetime_parse(char *arg, time_t *epoch, long *nsec, const char **errstr)
  353. {
  354. *nsec = 0;
  355. // For Alpine's abuild compatibility
  356. if(arg[0] == '@')
  357. {
  358. arg++;
  359. char *endptr = NULL;
  360. *epoch = strtol(arg, &endptr, 10);
  361. if(errno != 0)
  362. {
  363. *errstr = strerror(errno);
  364. errno = 0;
  365. return NULL;
  366. }
  367. return endptr;
  368. }
  369. char *ret = NULL;
  370. ret = email_datetime_parse(arg, epoch, errstr);
  371. if(ret != NULL || *errstr != NULL)
  372. {
  373. return ret;
  374. }
  375. ret = asctime_datetime_parse(arg, epoch, errstr);
  376. if(ret != NULL || *errstr != NULL)
  377. {
  378. return ret;
  379. }
  380. return iso_datetime_parse(arg, epoch, nsec, errstr);
  381. }