cmd/expr: Import from FreeBSD - utils-std - Collection of commonly available Unix tools

commit: 1dbe86a54b51968a0ea3fae9ede282b10b4cb44a
parent fc900cfa689f8ea4095aa542ba321352fba53171
Author: Haelwenn (lanodan) Monnier <contact@hacktivis.me>
Date:   Wed, 24 Apr 2024 21:47:03 +0200

cmd/expr: Import from FreeBSD

Diffstat:
M .gitignore 2 ++
M Makefile 8 ++++++++
M README.md 1 +
A cmd/expr.1 326 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A cmd/expr.y 566 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M configure 6 +++++-
M makeless-regen.sh 3 ++-
M makeless.sh 3 +++

8 files changed, 913 insertions(+), 2 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -6,9 +6,11 @@
 /cmd/*
 !/cmd/yes
 !/cmd/*.c
+!/cmd/*.y
 !/cmd/*.ha
 !/cmd/*.1
 !/cmd/*.1.in
+/cmd/expr.tab.c
 *.t.err
 *.o
 
diff --git a/Makefile b/Makefile
@@ -41,6 +41,7 @@ lint: $(MAN1SO)
 clean:
 	rm -fr $(EXE) $(MAN1SO) $(TEST_LIBS)
 	rm -fr ${EXE:=.c.gcov} ${EXE:=.gcda} ${EXE:=.gcno}
+	rm -f cmd/expr.tab.c
 
 install: all
 	mkdir -p ${DESTDIR}${BINDIR}/
@@ -121,3 +122,10 @@ cmd/truncate: cmd/truncate.c lib/truncation.c lib/truncation.h Makefile
 cmd/tr: cmd/tr.c lib/tr_str.c lib/tr_str.h Makefile
 	rm -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno}
 	$(CC) -std=c99 $(CFLAGS) -o $@ cmd/tr.c lib/tr_str.c $(LDFLAGS) $(LDSTATIC)
+
+cmd/expr.tab.c: cmd/expr.y Makefile
+	$(YACC) -b cmd/expr cmd/expr.y
+
+cmd/expr: cmd/expr.tab.c Makefile
+	rm -f ${<:=.gcov} ${@:=.gcda} ${@:=.gcno}
+	$(CC) -std=c99 $(CFLAGS) -o $@ cmd/expr.tab.c $(LDFLAGS) $(LDSTATIC)
diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ Developed on Linux+musl, automatically tested on FreeBSD and NetBSD thanks to [S
 
 ## Dependencies
 - C99 Compiler + POSIX C Library
+- POSIX yacc(1) implementation
 - POSIX Shell
 - POSIX Make (optional, see `makeless.sh`)
 - (optional, test) ATF: <https://github.com/jmmv/atf>
diff --git a/cmd/expr.1 b/cmd/expr.1
@@ -0,0 +1,326 @@
+.\" SPDX-License-Identifier: 0BSD
+.\" -*- nroff -*-
+.\"-
+.\" Copyright (c) 1993 Winning Strategies, Inc.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"      This product includes software developed by Winning Strategies, Inc.
+.\" 4. The name of the author may not be used to endorse or promote products
+.\"    derived from this software without specific prior written permission
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+.\" OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+.\" IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+.\" INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+.\" NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+.\" DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+.\" THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+.\" (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+.\" THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+.\"
+.Dd October 5, 2016
+.Dt EXPR 1
+.Os
+.Sh NAME
+.Nm expr
+.Nd evaluate expression
+.Sh SYNOPSIS
+.Nm
+.Op Fl e
+.Ar expression
+.Sh DESCRIPTION
+The
+.Nm
+utility evaluates
+.Ar expression
+and writes the result on standard output.
+.Pp
+All operators and operands must be passed as separate arguments.
+Several of the operators have special meaning to command interpreters
+and must therefore be quoted appropriately.
+All integer operands are interpreted in base 10 and must consist of only
+an optional leading minus sign followed by one or more digits (unless
+less strict parsing has been enabled for backwards compatibility with
+prior versions of
+.Nm
+in
+.Fx ) .
+.Pp
+Arithmetic operations are performed using signed integer math with a
+range according to the C
+.Vt intmax_t
+data type (the largest signed integral type available).
+All conversions and operations are checked for overflow.
+Overflow results in program termination with an error message on stdout
+and with an error status.
+.Pp
+The
+.Fl e
+option enables backwards compatible behaviour as detailed below.
+.Pp
+Operators are listed below in order of increasing precedence; all
+are left-associative.
+Operators with equal precedence are grouped within symbols
+.Ql {
+and
+.Ql } .
+.Bl -tag -width indent
+.It Ar expr1 Li \&| Ar expr2
+Return the evaluation of
+.Ar expr1
+if it is neither an empty string nor zero;
+otherwise, returns the evaluation of
+.Ar expr2
+if it is not an empty string;
+otherwise, returns zero.
+.It Ar expr1 Li & Ar expr2
+Return the evaluation of
+.Ar expr1
+if neither expression evaluates to an empty string or zero;
+otherwise, returns zero.
+.It Ar expr1 Bro =, >, >=, <, <=, != Brc Ar expr2
+Return the results of integer comparison if both arguments are integers;
+otherwise, returns the results of string comparison using the locale-specific
+collation sequence.
+The result of each comparison is 1 if the specified relation is true,
+or 0 if the relation is false.
+.It Ar expr1 Bro +, - Brc Ar expr2
+Return the results of addition or subtraction of integer-valued arguments.
+.It Ar expr1 Bro *, /, % Brc Ar expr2
+Return the results of multiplication, integer division, or remainder of integer-valued arguments.
+.It Ar expr1 Li \&: Ar expr2
+The
+.Dq Li \&:
+operator matches
+.Ar expr1
+against
+.Ar expr2 ,
+which must be a basic regular expression.
+The regular expression is anchored
+to the beginning of the string with an implicit
+.Dq Li ^ .
+.Pp
+If the match succeeds and the pattern contains at least one regular
+expression subexpression
+.Dq Li "\e(...\e)" ,
+the string corresponding to
+.Dq Li \e1
+is returned;
+otherwise the matching operator returns the number of characters matched.
+If the match fails and the pattern contains a regular expression subexpression
+the null string is returned;
+otherwise 0.
+.El
+.Pp
+Parentheses are used for grouping in the usual manner.
+.Pp
+The
+.Nm
+utility makes no lexical distinction between arguments which may be
+operators and arguments which may be operands.
+An operand which is lexically identical to an operator will be considered a
+syntax error.
+See the examples below for a work-around.
+.Pp
+The syntax of the
+.Nm
+command in general is historic and inconvenient.
+New applications are advised to use shell arithmetic rather than
+.Nm .
+.Ss Compatibility with previous implementations
+Unless
+.Fx
+4.x
+compatibility is enabled, this version of
+.Nm
+adheres to the
+.Tn POSIX
+Utility Syntax Guidelines, which require that a leading argument beginning
+with a minus sign be considered an option to the program.
+The standard
+.Fl Fl
+syntax may be used to prevent this interpretation.
+However, many historic implementations of
+.Nm ,
+including the one in previous versions of
+.Fx ,
+will not permit this syntax.
+See the examples below for portable ways to guarantee the correct
+interpretation.
+The
+.Xr check_utility_compat 3
+function (with a
+.Fa utility
+argument of
+.Dq Li expr )
+is used to determine whether backwards compatibility mode should be enabled.
+This feature is intended for use as a transition and debugging aid, when
+.Nm
+is used in complex scripts which cannot easily be recast to avoid the
+non-portable usage.
+Enabling backwards compatibility mode also implicitly enables the
+.Fl e
+option, since this matches the historic behavior of
+.Nm
+in
+.Fx . This option makes number parsing less strict and permits leading
+white space and an optional leading plus sign.
+In addition, empty operands
+have an implied value of zero in numeric context.
+For historical reasons, defining the environment variable
+.Ev EXPR_COMPAT
+also enables backwards compatibility mode.
+.Sh ENVIRONMENT
+.Bl -tag -width ".Ev EXPR_COMPAT"
+.It Ev EXPR_COMPAT
+If set, enables backwards compatibility mode.
+.El
+.Sh EXIT STATUS
+The
+.Nm
+utility exits with one of the following values:
+.Bl -tag -width indent -compact
+.It 0
+the expression is neither an empty string nor 0.
+.It 1
+the expression is an empty string or 0.
+.It 2
+the expression is invalid.
+.El
+.Sh EXAMPLES
+.Bl -bullet
+.It
+The following example (in
+.Xr sh 1
+syntax) adds one to the variable
+.Va a :
+.Dl "a=$(expr $a + 1)"
+.It
+This will fail if the value of
+.Va a
+is a negative number.
+To protect negative values of
+.Va a
+from being interpreted as options to the
+.Nm
+command, one might rearrange the expression:
+.Dl "a=$(expr 1 + $a)"
+.It
+More generally, parenthesize possibly-negative values:
+.Dl "a=$(expr \e( $a \e) + 1)"
+.It
+With shell arithmetic, no escaping is required:
+.Dl "a=$((a + 1))"
+.It
+This example prints the filename portion of a pathname stored
+in variable
+.Va a .
+Since
+.Va a
+might represent the path
+.Pa / ,
+it is necessary to prevent it from being interpreted as the division operator.
+The
+.Li //
+characters resolve this ambiguity.
+.Dl "expr \*q//$a\*q \&: '.*/\e(.*\e)'"
+.It
+With modern
+.Xr sh 1
+syntax,
+.Dl "\*q${a##*/}\*q"
+expands to the same value.
+.El
+.Pp
+The following examples output the number of characters in variable
+.Va a .
+Again, if
+.Va a
+might begin with a hyphen, it is necessary to prevent it from being
+interpreted as an option to
+.Nm ,
+and
+.Va a
+might be interpreted as an operator.
+.Bl -bullet
+.It
+To deal with all of this, a complicated command
+is required:
+.Dl "expr \e( \*qX$a\*q \&: \*q.*\*q \e) - 1"
+.It
+With modern
+.Xr sh 1
+syntax, this can be done much more easily:
+.Dl "${#a}"
+expands to the required number.
+.El
+.Sh SEE ALSO
+.Xr sh 1 ,
+.Xr test 1 ,
+.Xr check_utility_compat 3
+.Sh STANDARDS
+The
+.Nm
+utility conforms to
+.St -p1003.1-2008 ,
+provided that backwards compatibility mode is not enabled.
+.Pp
+Backwards compatibility mode performs less strict checks of numeric arguments:
+.Bl -bullet
+.It
+An empty operand string is interpreted as 0.
+.El
+.Bl -bullet
+.It
+Leading white space and/or a plus sign before an otherwise valid positive
+numeric operand are allowed and will be ignored.
+.El
+.Pp
+The extended arithmetic range and overflow checks do not conflict with
+POSIX's requirement that arithmetic be done using signed longs, since
+they only make a difference to the result in cases where using signed
+longs would give undefined behavior.
+.Pp
+According to the
+.Tn POSIX
+standard, the use of string arguments
+.Va length ,
+.Va substr ,
+.Va index ,
+or
+.Va match
+produces undefined results.
+In this version of
+.Nm ,
+these arguments are treated just as their respective string values.
+.Pp
+The
+.Fl e
+flag is an extension.
+.Sh HISTORY
+An
+.Nm
+utility first appeared in the Programmer's Workbench (PWB/UNIX).
+A public domain version of
+.Nm
+written by
+.An Pace Willisson Aq Mt pace@blitz.com
+appeared in
+.Bx 386 0.1 .
+.Sh AUTHORS
+Initial implementation by
+.An Pace Willisson Aq Mt pace@blitz.com
+was largely rewritten by
+.An -nosplit
+.An J.T. Conklin Aq Mt jtc@FreeBSD.org .
diff --git a/cmd/expr.y b/cmd/expr.y
@@ -0,0 +1,566 @@
+%{
+// SPDX-License-Identifier: 0BSD
+/*-
+ * Written by Pace Willisson (pace@blitz.com)
+ * and placed in the public domain.
+ *
+ * Largely rewritten by J.T. Conklin (jtc@wimsey.com)
+ */
+
+#define _POSIX_C_SOURCE 200809L
+
+#include <sys/types.h>
+#include <ctype.h>
+#include <err.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+#include <unistd.h>
+
+/*
+ * POSIX specifies a specific error code for syntax errors.  We exit
+ * with this code for all errors.
+ */
+#define	ERR_EXIT	2
+
+enum valtype {
+	integer, numeric_string, string
+} ;
+
+struct val {
+	enum valtype type;
+	union {
+		char *s;
+		intmax_t i;
+	} u;
+} ;
+
+char		**av;
+int		nonposix;
+struct val	*result;
+
+void		assert_to_integer(struct val *);
+void		assert_div(intmax_t, intmax_t);
+void		assert_minus(intmax_t, intmax_t, intmax_t);
+void		assert_plus(intmax_t, intmax_t, intmax_t);
+void		assert_times(intmax_t, intmax_t, intmax_t);
+int		compare_vals(struct val *, struct val *);
+void		free_value(struct val *);
+int		is_integer(const char *);
+int		is_string(struct val *);
+int		is_zero_or_null(struct val *);
+struct val	*make_integer(intmax_t);
+struct val	*make_str(const char *);
+struct val	*op_and(struct val *, struct val *);
+struct val	*op_colon(struct val *, struct val *);
+struct val	*op_div(struct val *, struct val *);
+struct val	*op_eq(struct val *, struct val *);
+struct val	*op_ge(struct val *, struct val *);
+struct val	*op_gt(struct val *, struct val *);
+struct val	*op_le(struct val *, struct val *);
+struct val	*op_lt(struct val *, struct val *);
+struct val	*op_minus(struct val *, struct val *);
+struct val	*op_ne(struct val *, struct val *);
+struct val	*op_or(struct val *, struct val *);
+struct val	*op_plus(struct val *, struct val *);
+struct val	*op_rem(struct val *, struct val *);
+struct val	*op_times(struct val *, struct val *);
+int		to_integer(struct val *);
+void		to_string(struct val *);
+#define YYERROR_IS_DECLARED
+int		yyerror(const char *);
+int		yylex(void);
+
+%}
+
+%union
+{
+	struct val *val;
+}
+
+%left <val> '|'
+%left <val> '&'
+%left <val> '=' '>' '<' GE LE NE
+%left <val> '+' '-'
+%left <val> '*' '/' '%'
+%left <val> ':'
+
+%token <val> TOKEN
+%type <val> start expr
+
+%%
+
+start: expr { result = $$; }
+
+expr:	TOKEN
+	| '(' expr ')' { $$ = $2; }
+	| expr '|' expr { $$ = op_or($1, $3); }
+	| expr '&' expr { $$ = op_and($1, $3); }
+	| expr '=' expr { $$ = op_eq($1, $3); }
+	| expr '>' expr { $$ = op_gt($1, $3); }
+	| expr '<' expr { $$ = op_lt($1, $3); }
+	| expr GE expr  { $$ = op_ge($1, $3); }
+	| expr LE expr  { $$ = op_le($1, $3); }
+	| expr NE expr  { $$ = op_ne($1, $3); }
+	| expr '+' expr { $$ = op_plus($1, $3); }
+	| expr '-' expr { $$ = op_minus($1, $3); }
+	| expr '*' expr { $$ = op_times($1, $3); }
+	| expr '/' expr { $$ = op_div($1, $3); }
+	| expr '%' expr { $$ = op_rem($1, $3); }
+	| expr ':' expr { $$ = op_colon($1, $3); }
+	;
+
+%%
+
+struct val *
+make_integer(intmax_t i)
+{
+	struct val *vp;
+
+	vp = (struct val *)malloc(sizeof(*vp));
+	if (vp == NULL)
+		errx(ERR_EXIT, "malloc() failed");
+
+	vp->type = integer;
+	vp->u.i  = i;
+	return (vp);
+}
+
+struct val *
+make_str(const char *s)
+{
+	struct val *vp;
+
+	vp = (struct val *)malloc(sizeof(*vp));
+	if (vp == NULL || ((vp->u.s = strdup(s)) == NULL))
+		errx(ERR_EXIT, "malloc() failed");
+
+	if (is_integer(s))
+		vp->type = numeric_string;
+	else
+		vp->type = string;
+
+	return (vp);
+}
+
+void
+free_value(struct val *vp)
+{
+	if (vp->type == string || vp->type == numeric_string)
+		free(vp->u.s);
+}
+
+int
+to_integer(struct val *vp)
+{
+	intmax_t i;
+
+	/* we can only convert numeric_string to integer, here */
+	if (vp->type == numeric_string) {
+		errno = 0;
+		i  = strtoimax(vp->u.s, (char **)NULL, 10);
+		/* just keep as numeric_string, if the conversion fails */
+		if (errno != ERANGE) {
+			free(vp->u.s);
+			vp->u.i = i;
+			vp->type = integer;
+		}
+	}
+	return (vp->type == integer);
+}
+
+void
+assert_to_integer(struct val *vp)
+{
+	if (vp->type == string)
+		errx(ERR_EXIT, "not a decimal number: '%s'", vp->u.s);
+	if (!to_integer(vp))
+		errx(ERR_EXIT, "operand too large: '%s'", vp->u.s);
+}
+
+void
+to_string(struct val *vp)
+{
+	char *tmp;
+
+	if (vp->type == string || vp->type == numeric_string)
+		return;
+
+	/*
+	 * log_10(x) ~= 0.3 * log_2(x).  Rounding up gives the number
+	 * of digits; add one each for the sign and terminating null
+	 * character, respectively.
+	 */
+#define	NDIGITS(x) (3 * (sizeof(x) * CHAR_BIT) / 10 + 1 + 1 + 1)
+	tmp = malloc(NDIGITS(vp->u.i));
+	if (tmp == NULL)
+		errx(ERR_EXIT, "malloc() failed");
+
+	sprintf(tmp, "%jd", vp->u.i);
+	vp->type = string;
+	vp->u.s  = tmp;
+}
+
+int
+is_integer(const char *s)
+{
+	if (nonposix) {
+		if (*s == '\0')
+			return (1);
+		while (isspace((unsigned char)*s))
+			s++;
+	}
+	if (*s == '-' || (nonposix && *s == '+'))
+		s++;
+	if (*s == '\0')
+		return (0);
+	while (isdigit((unsigned char)*s))
+		s++;
+	return (*s == '\0');
+}
+
+int
+is_string(struct val *vp)
+{
+	/* only TRUE if this string is not a valid integer */
+	return (vp->type == string);
+}
+
+int
+yylex(void)
+{
+	char *p;
+
+	if (*av == NULL)
+		return (0);
+
+	p = *av++;
+
+	if (strlen(p) == 1) {
+		if (strchr("|&=<>+-*/%:()", *p))
+			return (*p);
+	} else if (strlen(p) == 2 && p[1] == '=') {
+		switch (*p) {
+		case '>': return (GE);
+		case '<': return (LE);
+		case '!': return (NE);
+		}
+	}
+
+	yylval.val = make_str(p);
+	return (TOKEN);
+}
+
+int
+is_zero_or_null(struct val *vp)
+{
+	if (vp->type == integer)
+		return (vp->u.i == 0);
+
+	return (*vp->u.s == 0 || (to_integer(vp) && vp->u.i == 0));
+}
+
+int
+main(int argc, char *argv[])
+{
+	int c;
+
+	setlocale(LC_ALL, "");
+	if (getenv("EXPR_COMPAT") != NULL
+	    || getenv("POSIXLY_CORRECT") != NULL) {
+		av = argv + 1;
+		nonposix = 1;
+	} else {
+		while ((c = getopt(argc, argv, "e")) != -1) {
+			switch (c) {
+			case 'e':
+				nonposix = 1;
+				break;
+			default:
+				errx(ERR_EXIT,
+				    "usage: expr [-e] expression\n");
+			}
+		}
+		av = argv + optind;
+	}
+
+	yyparse();
+
+	if (result->type == integer)
+		printf("%jd\n", result->u.i);
+	else
+		printf("%s\n", result->u.s);
+
+	return (is_zero_or_null(result));
+}
+
+int
+yyerror(const char *s)
+{
+	errx(ERR_EXIT, "syntax error");
+}
+
+struct val *
+op_or(struct val *a, struct val *b)
+{
+	if (!is_zero_or_null(a)) {
+		free_value(b);
+		return (a);
+	}
+	free_value(a);
+	if (!is_zero_or_null(b))
+		return (b);
+	free_value(b);
+	return (make_integer((intmax_t)0));
+}
+
+struct val *
+op_and(struct val *a, struct val *b)
+{
+	if (is_zero_or_null(a) || is_zero_or_null(b)) {
+		free_value(a);
+		free_value(b);
+		return (make_integer((intmax_t)0));
+	} else {
+		free_value(b);
+		return (a);
+	}
+}
+
+int
+compare_vals(struct val *a, struct val *b)
+{
+	int r;
+
+	if (is_string(a) || is_string(b)) {
+		to_string(a);
+		to_string(b);
+		r = strcoll(a->u.s, b->u.s);
+	} else {
+		assert_to_integer(a);
+		assert_to_integer(b);
+		if (a->u.i > b->u.i)
+			r = 1;
+		else if (a->u.i < b->u.i)
+			r = -1;
+		else
+			r = 0;
+	}
+
+	free_value(a);
+	free_value(b);
+	return (r);
+}
+
+struct val *
+op_eq(struct val *a, struct val *b)
+{
+	return (make_integer((intmax_t)(compare_vals(a, b) == 0)));
+}
+
+struct val *
+op_gt(struct val *a, struct val *b)
+{
+	return (make_integer((intmax_t)(compare_vals(a, b) > 0)));
+}
+
+struct val *
+op_lt(struct val *a, struct val *b)
+{
+	return (make_integer((intmax_t)(compare_vals(a, b) < 0)));
+}
+
+struct val *
+op_ge(struct val *a, struct val *b)
+{
+	return (make_integer((intmax_t)(compare_vals(a, b) >= 0)));
+}
+
+struct val *
+op_le(struct val *a, struct val *b)
+{
+	return (make_integer((intmax_t)(compare_vals(a, b) <= 0)));
+}
+
+struct val *
+op_ne(struct val *a, struct val *b)
+{
+	return (make_integer((intmax_t)(compare_vals(a, b) != 0)));
+}
+
+void
+assert_plus(intmax_t a, intmax_t b, intmax_t r)
+{
+	/*
+	 * sum of two positive numbers must be positive,
+	 * sum of two negative numbers must be negative
+	 */
+	if ((a > 0 && b > 0 && r <= 0) ||
+	    (a < 0 && b < 0 && r >= 0))
+		errx(ERR_EXIT, "overflow");
+}
+
+struct val *
+op_plus(struct val *a, struct val *b)
+{
+	struct val *r;
+
+	assert_to_integer(a);
+	assert_to_integer(b);
+	r = make_integer(a->u.i + b->u.i);
+	assert_plus(a->u.i, b->u.i, r->u.i);
+
+	free_value(a);
+	free_value(b);
+	return (r);
+}
+
+void
+assert_minus(intmax_t a, intmax_t b, intmax_t r)
+{
+	if ((a >= 0 && b < 0 && r <= 0) ||
+	    (a < 0 && b > 0 && r >= 0))
+		errx(ERR_EXIT, "overflow");
+}
+
+struct val *
+op_minus(struct val *a, struct val *b)
+{
+	struct val *r;
+
+	assert_to_integer(a);
+	assert_to_integer(b);
+	r = make_integer(a->u.i - b->u.i);
+	assert_minus(a->u.i, b->u.i, r->u.i);
+
+	free_value(a);
+	free_value(b);
+	return (r);
+}
+
+/*
+ * We depend on undefined behaviour giving a result (in r).
+ * To test this result, pass it as volatile.  This prevents
+ * optimizing away of the test based on the undefined behaviour.
+ */
+void
+assert_times(intmax_t a, intmax_t b, volatile intmax_t r)
+{
+	/*
+	 * If the first operand is 0, no overflow is possible, 
+	 * else the result of the division test must match the
+	 * second operand.
+	 *
+	 * Be careful to avoid overflow in the overflow test, as
+	 * in assert_div().  Overflow in division would kill us
+	 * with a SIGFPE before getting the test wrong.  In old
+	 * buggy versions, optimization used to give a null test
+	 * instead of a SIGFPE.
+	 */
+	if ((a == -1 && b == INTMAX_MIN) || (a != 0 && r / a != b))
+		errx(ERR_EXIT, "overflow");
+}
+
+struct val *
+op_times(struct val *a, struct val *b)
+{
+	struct val *r;
+
+	assert_to_integer(a);
+	assert_to_integer(b);
+	r = make_integer(a->u.i * b->u.i);
+	assert_times(a->u.i, b->u.i, r->u.i);
+
+	free_value(a);
+	free_value(b);
+	return (r);
+}
+
+void
+assert_div(intmax_t a, intmax_t b)
+{
+	if (b == 0)
+		errx(ERR_EXIT, "division by zero");
+	/* only INTMAX_MIN / -1 causes overflow */
+	if (a == INTMAX_MIN && b == -1)
+		errx(ERR_EXIT, "overflow");
+}
+
+struct val *
+op_div(struct val *a, struct val *b)
+{
+	struct val *r;
+
+	assert_to_integer(a);
+	assert_to_integer(b);
+	/* assert based on operands only, not on result */
+	assert_div(a->u.i, b->u.i);
+	r = make_integer(a->u.i / b->u.i);
+
+	free_value(a);
+	free_value(b);
+	return (r);
+}
+
+struct val *
+op_rem(struct val *a, struct val *b)
+{
+	struct val *r;
+
+	assert_to_integer(a);
+	assert_to_integer(b);
+	/* pass a=1 to only check for div by zero */
+	assert_div(1, b->u.i);
+	r = make_integer(a->u.i % b->u.i);
+
+	free_value(a);
+	free_value(b);
+	return (r);
+}
+
+struct val *
+op_colon(struct val *a, struct val *b)
+{
+	regex_t rp;
+	regmatch_t rm[2];
+	char errbuf[256];
+	int eval;
+	struct val *v;
+
+	/* coerce both arguments to strings */
+	to_string(a);
+	to_string(b);
+
+	/* compile regular expression */
+	if ((eval = regcomp(&rp, b->u.s, 0)) != 0) {
+		regerror(eval, &rp, errbuf, sizeof(errbuf));
+		errx(ERR_EXIT, "%s", errbuf);
+	}
+
+	/* compare string against pattern */
+	/* remember that patterns are anchored to the beginning of the line */
+	if (regexec(&rp, a->u.s, (size_t)2, rm, 0) == 0 && rm[0].rm_so == 0)
+		if (rm[1].rm_so >= 0) {
+			*(a->u.s + rm[1].rm_eo) = '\0';
+			v = make_str(a->u.s + rm[1].rm_so);
+
+		} else
+			v = make_integer((intmax_t)(rm[0].rm_eo));
+	else
+		if (rp.re_nsub == 0)
+			v = make_integer((intmax_t)0);
+		else
+			v = make_str("");
+
+	/* free arguments and pattern buffer */
+	free_value(a);
+	free_value(b);
+	regfree(&rp);
+
+	return (v);
+}
diff --git a/configure b/configure
@@ -18,6 +18,7 @@ Variables:
   PKGCONFIG=BIN
   MSGFMT=BIN
   CC=BIN
+  YACC=BIN
   MAKE=BIN
   M4=BIN
   MANDOC=BIN
@@ -62,7 +63,7 @@ pkg_config_check() {
 
 gen_targets() {
 	printf 'EXE = '
-	printf '%s\n ' cmd/*.c | grep -v -F -f target_filter | sed 's;.c$;;' | tr -d '\n'
+	printf '%s\n ' cmd/*.c cmd/*.y | grep -v -F -f target_filter | sed -e 's;\.c$;;' -e 's;\.y$;;' | tr -d '\n'
 	echo
 
 	printf 'MAN1SO = '
@@ -121,6 +122,7 @@ PREFIX="${PREFIX:-/usr/local}"
 PKGCONFIG="${PKGCONFIG:-pkg-config}"
 MSGFMT="${MSGFMT:-msgfmt}"
 CC="${CC:-cc}"
+YACC="${YACC:-yacc}"
 MAKE="${MAKE:-make}"
 GCOV="${GCOV:-gcov}"
 # -DDEBUG: Otherwise assert() does nothing, fine to be removed in production
@@ -173,6 +175,7 @@ rm -f config.mk && echo '#' > target_filter ; or_die
 # commands
 check_cmd PKGCONFIG "$PKGCONFIG" || exit 1
 check_cmd CC "$CC" || exit 1
+check_cmd YACC "$YACC" || exit 1
 check_cmd MAKE "$MAKE" || exit 1
 check_cmd M4 "$M4" || exit 1
 
@@ -273,6 +276,7 @@ MANDIR   = ${MANDIR}
 
 PKGCONFIG = ${PKGCONFIG}
 CC = ${CC}
+YACC = ${YACC}
 MAKE = ${MAKE}
 M4 = ${M4}
 MANDOC = ${MANDOC}
diff --git a/makeless-regen.sh b/makeless-regen.sh
@@ -10,11 +10,12 @@ make clean
 # Generated using ./makeless-regen.sh
 # Intended to make it easier to bootstrap a working system, not for regular usage
 : ${CC:=cc}
+: ${YACC:=yacc}
 : ${CFLAGS:=-Os -Wall}
 : ${M4:=m4}
 
 set -ex
 
 '
-	make -n CC='$$CC' CFLAGS='$$CFLAGS' LDFLAGS='$$LDFLAGS' LDSTATIC='$$LDSTATIC' M4='$$M4' | grep -vF .c.gcov
+	make -n CC='$$CC' CFLAGS='$$CFLAGS' LDFLAGS='$$LDFLAGS' LDSTATIC='$$LDSTATIC' YACC='$$YACC' M4='$$M4' | grep -vF .c.gcov
 )> makeless.sh
diff --git a/makeless.sh b/makeless.sh
@@ -4,6 +4,7 @@
 # Generated using ./makeless-regen.sh
 # Intended to make it easier to bootstrap a working system, not for regular usage
 : ${CC:=cc}
+: ${YACC:=yacc}
 : ${CFLAGS:=-Os -Wall}
 : ${M4:=m4}
 
@@ -51,5 +52,7 @@ $CC -std=c99 $CFLAGS -o cmd/tty cmd/tty.c $LDFLAGS $LDSTATIC
 $CC -std=c99 $CFLAGS -o cmd/uname cmd/uname.c $LDFLAGS $LDSTATIC
 $CC -std=c99 $CFLAGS -o cmd/unlink cmd/unlink.c $LDFLAGS $LDSTATIC
 $CC -std=c99 $CFLAGS -o cmd/wc cmd/wc.c $LDFLAGS $LDSTATIC
+$YACC -b cmd/expr cmd/expr.y
+$CC -std=c99 $CFLAGS -o cmd/expr cmd/expr.tab.c $LDFLAGS $LDSTATIC
 $M4 cmd/date.1.in > build/cmd/date.1
 $M4 cmd/touch.1.in > build/cmd/touch.1

M	.gitignore	2	++
M	Makefile	8	++++++++
M	README.md	1	+
A	cmd/expr.1	326	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	cmd/expr.y	566	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	configure	6	+++++-
M	makeless-regen.sh	3	++-
M	makeless.sh	3	+++