logo

oasis

Own branch of Oasis Linux (upstream: <https://git.sr.ht/~mcf/oasis/>) git clone https://anongit.hacktivis.me/git/oasis.git

0017-m4-Use-hand-written-lexer-to-avoid-cycle-in-bootstra.patch (7485B)


  1. From edf250c633bef40e7e37dafc9fc393dd2ad9074f Mon Sep 17 00:00:00 2001
  2. From: Michael Forney <mforney@mforney.org>
  3. Date: Tue, 10 Apr 2018 13:37:14 -0700
  4. Subject: [PATCH] m4: Use hand-written lexer to avoid cycle in bootstrap
  5. ---
  6. usr.bin/m4/tokenizer.c | 191 +++++++++++++++++++++++++++++++++++++++++
  7. usr.bin/m4/tokenizer.l | 109 -----------------------
  8. 2 files changed, 191 insertions(+), 109 deletions(-)
  9. create mode 100644 usr.bin/m4/tokenizer.c
  10. delete mode 100644 usr.bin/m4/tokenizer.l
  11. diff --git a/usr.bin/m4/tokenizer.c b/usr.bin/m4/tokenizer.c
  12. new file mode 100644
  13. index 00000000000..fa19fc65035
  14. --- /dev/null
  15. +++ b/usr.bin/m4/tokenizer.c
  16. @@ -0,0 +1,191 @@
  17. +/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
  18. +/*
  19. + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
  20. + *
  21. + * Permission to use, copy, modify, and distribute this software for any
  22. + * purpose with or without fee is hereby granted, provided that the above
  23. + * copyright notice and this permission notice appear in all copies.
  24. + *
  25. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  26. + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  27. + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  28. + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  29. + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  30. + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  31. + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  32. + */
  33. +#include "parser.tab.h"
  34. +#include <assert.h>
  35. +#include <ctype.h>
  36. +#include <errno.h>
  37. +#include <limits.h>
  38. +#include <stdbool.h>
  39. +#include <stdio.h>
  40. +#include <stdlib.h>
  41. +#include <stdint.h>
  42. +
  43. +extern void m4_warnx(const char *, ...);
  44. +extern int mimic_gnu;
  45. +extern int32_t yylval;
  46. +static const char *yypos;
  47. +
  48. +void
  49. +yy_scan_string(const char *s)
  50. +{
  51. + yypos = s;
  52. +}
  53. +
  54. +static int32_t
  55. +number(const char *yytext, size_t yylen)
  56. +{
  57. + long l;
  58. +
  59. + errno = 0;
  60. + l = strtol(yytext, NULL, 0);
  61. + if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
  62. + l > INT32_MAX || l < INT32_MIN)
  63. + m4_warnx("numeric overflow in expr: %.*s", (int)yylen, yytext);
  64. + return l;
  65. +}
  66. +
  67. +static int32_t
  68. +parse_radix(const char *yytext, size_t yylen)
  69. +{
  70. + long base;
  71. + char *next;
  72. + long l;
  73. + int d;
  74. +
  75. + l = 0;
  76. + base = strtol(yytext+2, &next, 0);
  77. + if (base > 36 || next == NULL) {
  78. + m4_warnx("error in number %.*s", (int)yylen, yytext);
  79. + } else {
  80. + next++;
  81. + while (*next != 0) {
  82. + if (*next >= '0' && *next <= '9')
  83. + d = *next - '0';
  84. + else if (*next >= 'a' && *next <= 'z')
  85. + d = *next - 'a' + 10;
  86. + else {
  87. + assert(*next >= 'A' && *next <= 'Z');
  88. + d = *next - 'A' + 10;
  89. + }
  90. + if (d >= base) {
  91. + m4_warnx("error in number %.*s", (int)yylen, yytext);
  92. + return 0;
  93. + }
  94. + l = base * l + d;
  95. + next++;
  96. + }
  97. + }
  98. + return l;
  99. +}
  100. +
  101. +static int
  102. +isodigit(int c)
  103. +{
  104. + return c >= '0' && c <= '7';
  105. +}
  106. +
  107. +int yylex(void)
  108. +{
  109. + const char *start;
  110. +
  111. +next:
  112. + start = yypos;
  113. + switch (*yypos) {
  114. + case ' ':
  115. + case '\t':
  116. + case '\n':
  117. + ++yypos;
  118. + goto next;
  119. + case '<':
  120. + switch (yypos[1]) {
  121. + case '=':
  122. + yypos += 2;
  123. + return LE;
  124. + case '<':
  125. + yypos += 2;
  126. + return LSHIFT;
  127. + }
  128. + break;
  129. + case '>':
  130. + switch (yypos[1]) {
  131. + case '=':
  132. + yypos += 2;
  133. + return GE;
  134. + case '>':
  135. + yypos += 2;
  136. + return RSHIFT;
  137. + }
  138. + break;
  139. + case '=':
  140. + if (yypos[1] != '=')
  141. + break;
  142. + yypos += 2;
  143. + return EQ;
  144. + case '!':
  145. + if (yypos[1] != '=')
  146. + break;
  147. + yypos += 2;
  148. + return NE;
  149. + case '&':
  150. + if (yypos[1] != '&')
  151. + break;
  152. + yypos += 2;
  153. + return LAND;
  154. + case '|':
  155. + if (yypos[1] != '|')
  156. + break;
  157. + yypos += 2;
  158. + return LOR;
  159. + case '*':
  160. + if (!mimic_gnu || yypos[1] != '*')
  161. + break;
  162. + yypos += 2;
  163. + return EXPONENT;
  164. + case '0':
  165. + switch (*++yypos) {
  166. + case 'x':
  167. + case 'X':
  168. + if (!isxdigit(*++yypos))
  169. + return ERROR;
  170. + do ++yypos;
  171. + while (isxdigit(*yypos));
  172. + break;
  173. + case 'r':
  174. + case 'R':
  175. + if (!mimic_gnu)
  176. + break;
  177. + if (!isdigit(*++yypos))
  178. + return ERROR;
  179. + do ++yypos;
  180. + while (isdigit(*yypos));
  181. + if (*yypos != ':')
  182. + return ERROR;
  183. + if (!isalnum(*++yypos))
  184. + return ERROR;
  185. + do ++yypos;
  186. + while (isalnum(*yypos));
  187. + yylval = parse_radix(start, yypos - start);
  188. + return NUMBER;
  189. + default:
  190. + do ++yypos;
  191. + while (isodigit(*yypos));
  192. + break;
  193. + }
  194. + yylval = number(start, yypos - start);
  195. + return NUMBER;
  196. + case '\0':
  197. + return '\0';
  198. + }
  199. + if (isdigit(*yypos)) {
  200. + do ++yypos;
  201. + while (isdigit(*yypos));
  202. + yylval = number(start, yypos - start);
  203. + return NUMBER;
  204. + }
  205. +
  206. + return *yypos++;
  207. +}
  208. diff --git a/usr.bin/m4/tokenizer.l b/usr.bin/m4/tokenizer.l
  209. deleted file mode 100644
  210. index 94f02fb6085..00000000000
  211. --- a/usr.bin/m4/tokenizer.l
  212. +++ /dev/null
  213. @@ -1,109 +0,0 @@
  214. -%{
  215. -/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
  216. -/*
  217. - * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
  218. - *
  219. - * Permission to use, copy, modify, and distribute this software for any
  220. - * purpose with or without fee is hereby granted, provided that the above
  221. - * copyright notice and this permission notice appear in all copies.
  222. - *
  223. - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  224. - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  225. - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  226. - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  227. - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  228. - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  229. - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  230. - */
  231. -#include "parser.h"
  232. -#include <assert.h>
  233. -#include <stdlib.h>
  234. -#include <errno.h>
  235. -#include <stdint.h>
  236. -#include <limits.h>
  237. -
  238. -extern void m4_warnx(const char *, ...);
  239. -extern int mimic_gnu;
  240. -extern int32_t yylval;
  241. -
  242. -int32_t number(void);
  243. -int32_t parse_radix(void);
  244. -%}
  245. -
  246. -delim [ \t\n]
  247. -ws {delim}+
  248. -hex 0[xX][0-9a-fA-F]+
  249. -oct 0[0-7]*
  250. -dec [1-9][0-9]*
  251. -radix 0[rR][0-9]+:[0-9a-zA-Z]+
  252. -
  253. -%option noyywrap
  254. -
  255. -%%
  256. -{ws} {/* just skip it */}
  257. -{hex}|{oct}|{dec} { yylval = number(); return(NUMBER); }
  258. -{radix} { if (mimic_gnu) {
  259. - yylval = parse_radix(); return(NUMBER);
  260. - } else {
  261. - return(ERROR);
  262. - }
  263. - }
  264. -"<=" { return(LE); }
  265. -">=" { return(GE); }
  266. -"<<" { return(LSHIFT); }
  267. -">>" { return(RSHIFT); }
  268. -"==" { return(EQ); }
  269. -"!=" { return(NE); }
  270. -"&&" { return(LAND); }
  271. -"||" { return(LOR); }
  272. -"**" { if (mimic_gnu) { return (EXPONENT); } }
  273. -. { return yytext[0]; }
  274. -%%
  275. -
  276. -int32_t
  277. -number()
  278. -{
  279. - long l;
  280. -
  281. - errno = 0;
  282. - l = strtol(yytext, NULL, 0);
  283. - if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
  284. - l > INT32_MAX || l < INT32_MIN)
  285. - m4_warnx("numeric overflow in expr: %s", yytext);
  286. - return l;
  287. -}
  288. -
  289. -int32_t
  290. -parse_radix()
  291. -{
  292. - long base;
  293. - char *next;
  294. - long l;
  295. - int d;
  296. -
  297. - l = 0;
  298. - base = strtol(yytext+2, &next, 0);
  299. - if (base > 36 || next == NULL) {
  300. - m4_warnx("error in number %s", yytext);
  301. - } else {
  302. - next++;
  303. - while (*next != 0) {
  304. - if (*next >= '0' && *next <= '9')
  305. - d = *next - '0';
  306. - else if (*next >= 'a' && *next <= 'z')
  307. - d = *next - 'a' + 10;
  308. - else {
  309. - assert(*next >= 'A' && *next <= 'Z');
  310. - d = *next - 'A' + 10;
  311. - }
  312. - if (d >= base) {
  313. - m4_warnx("error in number %s", yytext);
  314. - return 0;
  315. - }
  316. - l = base * l + d;
  317. - next++;
  318. - }
  319. - }
  320. - return l;
  321. -}
  322. -
  323. --
  324. 2.17.0