0017-m4-Use-hand-written-lexer-to-avoid-cycle-in-bootstra.patch (7485B)
- From edf250c633bef40e7e37dafc9fc393dd2ad9074f Mon Sep 17 00:00:00 2001
- From: Michael Forney <mforney@mforney.org>
- Date: Tue, 10 Apr 2018 13:37:14 -0700
- Subject: [PATCH] m4: Use hand-written lexer to avoid cycle in bootstrap
- ---
- usr.bin/m4/tokenizer.c | 191 +++++++++++++++++++++++++++++++++++++++++
- usr.bin/m4/tokenizer.l | 109 -----------------------
- 2 files changed, 191 insertions(+), 109 deletions(-)
- create mode 100644 usr.bin/m4/tokenizer.c
- delete mode 100644 usr.bin/m4/tokenizer.l
- diff --git a/usr.bin/m4/tokenizer.c b/usr.bin/m4/tokenizer.c
- new file mode 100644
- index 00000000000..fa19fc65035
- --- /dev/null
- +++ b/usr.bin/m4/tokenizer.c
- @@ -0,0 +1,191 @@
- +/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
- +/*
- + * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
- + *
- + * Permission to use, copy, modify, and distribute this software for any
- + * purpose with or without fee is hereby granted, provided that the above
- + * copyright notice and this permission notice appear in all copies.
- + *
- + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- + */
- +#include "parser.tab.h"
- +#include <assert.h>
- +#include <ctype.h>
- +#include <errno.h>
- +#include <limits.h>
- +#include <stdbool.h>
- +#include <stdio.h>
- +#include <stdlib.h>
- +#include <stdint.h>
- +
- +extern void m4_warnx(const char *, ...);
- +extern int mimic_gnu;
- +extern int32_t yylval;
- +static const char *yypos;
- +
- +void
- +yy_scan_string(const char *s)
- +{
- + yypos = s;
- +}
- +
- +static int32_t
- +number(const char *yytext, size_t yylen)
- +{
- + long l;
- +
- + errno = 0;
- + l = strtol(yytext, NULL, 0);
- + if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
- + l > INT32_MAX || l < INT32_MIN)
- + m4_warnx("numeric overflow in expr: %.*s", (int)yylen, yytext);
- + return l;
- +}
- +
- +static int32_t
- +parse_radix(const char *yytext, size_t yylen)
- +{
- + long base;
- + char *next;
- + long l;
- + int d;
- +
- + l = 0;
- + base = strtol(yytext+2, &next, 0);
- + if (base > 36 || next == NULL) {
- + m4_warnx("error in number %.*s", (int)yylen, yytext);
- + } else {
- + next++;
- + while (*next != 0) {
- + if (*next >= '0' && *next <= '9')
- + d = *next - '0';
- + else if (*next >= 'a' && *next <= 'z')
- + d = *next - 'a' + 10;
- + else {
- + assert(*next >= 'A' && *next <= 'Z');
- + d = *next - 'A' + 10;
- + }
- + if (d >= base) {
- + m4_warnx("error in number %.*s", (int)yylen, yytext);
- + return 0;
- + }
- + l = base * l + d;
- + next++;
- + }
- + }
- + return l;
- +}
- +
- +static int
- +isodigit(int c)
- +{
- + return c >= '0' && c <= '7';
- +}
- +
- +int yylex(void)
- +{
- + const char *start;
- +
- +next:
- + start = yypos;
- + switch (*yypos) {
- + case ' ':
- + case '\t':
- + case '\n':
- + ++yypos;
- + goto next;
- + case '<':
- + switch (yypos[1]) {
- + case '=':
- + yypos += 2;
- + return LE;
- + case '<':
- + yypos += 2;
- + return LSHIFT;
- + }
- + break;
- + case '>':
- + switch (yypos[1]) {
- + case '=':
- + yypos += 2;
- + return GE;
- + case '>':
- + yypos += 2;
- + return RSHIFT;
- + }
- + break;
- + case '=':
- + if (yypos[1] != '=')
- + break;
- + yypos += 2;
- + return EQ;
- + case '!':
- + if (yypos[1] != '=')
- + break;
- + yypos += 2;
- + return NE;
- + case '&':
- + if (yypos[1] != '&')
- + break;
- + yypos += 2;
- + return LAND;
- + case '|':
- + if (yypos[1] != '|')
- + break;
- + yypos += 2;
- + return LOR;
- + case '*':
- + if (!mimic_gnu || yypos[1] != '*')
- + break;
- + yypos += 2;
- + return EXPONENT;
- + case '0':
- + switch (*++yypos) {
- + case 'x':
- + case 'X':
- + if (!isxdigit(*++yypos))
- + return ERROR;
- + do ++yypos;
- + while (isxdigit(*yypos));
- + break;
- + case 'r':
- + case 'R':
- + if (!mimic_gnu)
- + break;
- + if (!isdigit(*++yypos))
- + return ERROR;
- + do ++yypos;
- + while (isdigit(*yypos));
- + if (*yypos != ':')
- + return ERROR;
- + if (!isalnum(*++yypos))
- + return ERROR;
- + do ++yypos;
- + while (isalnum(*yypos));
- + yylval = parse_radix(start, yypos - start);
- + return NUMBER;
- + default:
- + do ++yypos;
- + while (isodigit(*yypos));
- + break;
- + }
- + yylval = number(start, yypos - start);
- + return NUMBER;
- + case '\0':
- + return '\0';
- + }
- + if (isdigit(*yypos)) {
- + do ++yypos;
- + while (isdigit(*yypos));
- + yylval = number(start, yypos - start);
- + return NUMBER;
- + }
- +
- + return *yypos++;
- +}
- diff --git a/usr.bin/m4/tokenizer.l b/usr.bin/m4/tokenizer.l
- deleted file mode 100644
- index 94f02fb6085..00000000000
- --- a/usr.bin/m4/tokenizer.l
- +++ /dev/null
- @@ -1,109 +0,0 @@
- -%{
- -/* $OpenBSD: tokenizer.l,v 1.10 2017/06/17 01:55:16 bcallah Exp $ */
- -/*
- - * Copyright (c) 2004 Marc Espie <espie@cvs.openbsd.org>
- - *
- - * Permission to use, copy, modify, and distribute this software for any
- - * purpose with or without fee is hereby granted, provided that the above
- - * copyright notice and this permission notice appear in all copies.
- - *
- - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- - */
- -#include "parser.h"
- -#include <assert.h>
- -#include <stdlib.h>
- -#include <errno.h>
- -#include <stdint.h>
- -#include <limits.h>
- -
- -extern void m4_warnx(const char *, ...);
- -extern int mimic_gnu;
- -extern int32_t yylval;
- -
- -int32_t number(void);
- -int32_t parse_radix(void);
- -%}
- -
- -delim [ \t\n]
- -ws {delim}+
- -hex 0[xX][0-9a-fA-F]+
- -oct 0[0-7]*
- -dec [1-9][0-9]*
- -radix 0[rR][0-9]+:[0-9a-zA-Z]+
- -
- -%option noyywrap
- -
- -%%
- -{ws} {/* just skip it */}
- -{hex}|{oct}|{dec} { yylval = number(); return(NUMBER); }
- -{radix} { if (mimic_gnu) {
- - yylval = parse_radix(); return(NUMBER);
- - } else {
- - return(ERROR);
- - }
- - }
- -"<=" { return(LE); }
- -">=" { return(GE); }
- -"<<" { return(LSHIFT); }
- -">>" { return(RSHIFT); }
- -"==" { return(EQ); }
- -"!=" { return(NE); }
- -"&&" { return(LAND); }
- -"||" { return(LOR); }
- -"**" { if (mimic_gnu) { return (EXPONENT); } }
- -. { return yytext[0]; }
- -%%
- -
- -int32_t
- -number()
- -{
- - long l;
- -
- - errno = 0;
- - l = strtol(yytext, NULL, 0);
- - if (((l == LONG_MAX || l == LONG_MIN) && errno == ERANGE) ||
- - l > INT32_MAX || l < INT32_MIN)
- - m4_warnx("numeric overflow in expr: %s", yytext);
- - return l;
- -}
- -
- -int32_t
- -parse_radix()
- -{
- - long base;
- - char *next;
- - long l;
- - int d;
- -
- - l = 0;
- - base = strtol(yytext+2, &next, 0);
- - if (base > 36 || next == NULL) {
- - m4_warnx("error in number %s", yytext);
- - } else {
- - next++;
- - while (*next != 0) {
- - if (*next >= '0' && *next <= '9')
- - d = *next - '0';
- - else if (*next >= 'a' && *next <= 'z')
- - d = *next - 'a' + 10;
- - else {
- - assert(*next >= 'A' && *next <= 'Z');
- - d = *next - 'A' + 10;
- - }
- - if (d >= base) {
- - m4_warnx("error in number %s", yytext);
- - return 0;
- - }
- - l = base * l + d;
- - next++;
- - }
- - }
- - return l;
- -}
- -
- --
- 2.17.0