blob: 0bd8a5ea7728aaff9e1bf7893c9c947143b5b37f [file] [log] [blame]
/* GNU m4 -- A simple macro processor
Copyright (C) 1989-1994, 2006-2014, 2016-2017, 2020-2025 Free
Software Foundation, Inc.
This file is part of GNU M4.
GNU M4 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GNU M4 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* This file contains the functions to evaluate integer expressions for
the "eval" macro. It is a little, fairly self-contained module, with
its own scanner, and a recursive descent parser. The only entry point
is evaluate (). */
#include "m4.h"
/* Evaluates token types. */
typedef enum eval_token
{
/* Value / 10 is precedence order. */
ERROR = 0,
BADOP,
EMPTY,
EOTEXT,
LEFTP,
RIGHTP,
LNOT,
NOT,
NUMBER,
COLON,
QUESTION = 10,
LOR = 20,
LAND = 30,
OR = 40,
XOR = 50,
AND = 60,
EQ = 70,
NOTEQ,
GT = 80,
GTEQ,
LS,
LSEQ,
LSHIFT = 90,
RSHIFT,
URSHIFT,
/* precedence given for binary op; PLUS and MINUS also serve as a unary op */
PLUS = 100,
MINUS,
TIMES = 110,
DIVIDE,
MODULO,
EXPONENT = 120
}
eval_token;
/* Error types. */
typedef enum eval_error
{
NO_ERROR,
DIVIDE_ZERO,
MODULO_ZERO,
NEGATIVE_EXPONENT,
/* All errors prior to SYNTAX_ERROR can be ignored in a dead
branch of && and ||. All errors after are just more details
about a syntax error. */
SYNTAX_ERROR,
MISSING_RIGHT,
MISSING_COLON,
UNKNOWN_INPUT,
EXCESS_INPUT,
INVALID_OPERATOR,
MISSING_VALUE,
EMPTY_ARGUMENT
}
eval_error;
static eval_error primary (int32_t *);
static eval_error parse_expr (int32_t *, eval_error, unsigned);
/* Lexical functions. */
/* Pointer to next character of input text. */
static const char *eval_text;
/* Value of eval_text, from before last call of eval_lex (). This is so we
can back up, if we have read too much, good for one token lookahead. */
static const char *last_text;
/* Detect when to end parsing. */
static const char *end_text;
/* Prime the lexer at the start of TEXT, with length LEN. */
static void
eval_init_lex (const char *text, size_t len)
{
eval_text = text;
end_text = text + len;
last_text = NULL;
}
static void
eval_undo (void)
{
eval_text = last_text;
}
/* VAL is numerical value, if any. */
static eval_token
eval_lex (int32_t *val)
{
while (eval_text != end_text && c_isspace (*eval_text))
eval_text++;
if (!last_text && eval_text == end_text)
return EMPTY;
last_text = eval_text;
if (eval_text == end_text)
return EOTEXT;
if (c_isdigit (*eval_text))
{
unsigned int base, digit;
/* The documentation says that "overflow silently results in wraparound".
Therefore use an unsigned integer type to avoid undefined behaviour
when parsing '-2147483648'. */
uint32_t value;
if (*eval_text == '0')
{
eval_text++;
switch (*eval_text)
{
case 'x':
case 'X':
base = 16;
eval_text++;
break;
case 'b':
case 'B':
base = 2;
eval_text++;
break;
case 'r':
case 'R':
base = 0;
eval_text++;
while (c_isdigit (*eval_text) && base <= 36)
base = 10 * base + *eval_text++ - '0';
if (base == 0 || base > 36 || *eval_text != ':')
return ERROR;
eval_text++;
break;
default:
base = 8;
}
}
else
base = 10;
value = 0;
for (; *eval_text; eval_text++)
{
if (c_isdigit (*eval_text))
digit = *eval_text - '0';
else if (c_islower (*eval_text))
digit = *eval_text - 'a' + 10;
else if (c_isupper (*eval_text))
digit = *eval_text - 'A' + 10;
else
break;
if (base == 1)
{
if (digit == 1)
value++;
else if (digit == 0 && value == 0)
continue;
else
break;
}
else if (digit >= base)
break;
else
value = value * base + digit;
}
*val = value;
return NUMBER;
}
switch (*eval_text++)
{
case '+':
if (*eval_text == '+' || *eval_text == '=')
return BADOP;
return PLUS;
case '-':
if (*eval_text == '-' || *eval_text == '=')
return BADOP;
return MINUS;
case '*':
if (*eval_text == '*')
{
eval_text++;
return EXPONENT;
}
else if (*eval_text == '=')
return BADOP;
return TIMES;
case '/':
if (*eval_text == '=')
return BADOP;
return DIVIDE;
case '%':
if (*eval_text == '=')
return BADOP;
return MODULO;
case '=':
if (*eval_text == '=')
{
eval_text++;
return EQ;
}
return BADOP;
case '!':
if (*eval_text == '=')
{
eval_text++;
return NOTEQ;
}
return LNOT;
case '>':
if (*eval_text == '=')
{
eval_text++;
return GTEQ;
}
else if (*eval_text == '>')
{
eval_text++;
if (*eval_text == '>')
{
if (*++eval_text == '=')
return BADOP;
return URSHIFT;
}
else if (*eval_text == '=')
return BADOP;
return RSHIFT;
}
return GT;
case '<':
if (*eval_text == '=')
{
eval_text++;
return LSEQ;
}
else if (*eval_text == '<')
{
if (*++eval_text == '=')
return BADOP;
return LSHIFT;
}
return LS;
case '^':
if (*eval_text == '=')
return BADOP;
return XOR;
case '~':
return NOT;
case '&':
if (*eval_text == '&')
{
eval_text++;
return LAND;
}
else if (*eval_text == '=')
return BADOP;
return AND;
case '|':
if (*eval_text == '|')
{
eval_text++;
return LOR;
}
else if (*eval_text == '=')
return BADOP;
return OR;
case '(':
return LEFTP;
case ')':
return RIGHTP;
case '?':
return QUESTION;
case ':':
return COLON;
default:
return ERROR;
}
}
/* Operator precedence parser (based on Pratt parser). */
/* Parse `(expr)', unary operators, and numbers. */
static eval_error
primary (int32_t *v1)
{
eval_error er;
int32_t v2;
switch (eval_lex (v1))
{
/* Number */
case NUMBER:
return NO_ERROR;
/* Parenthesis */
case LEFTP:
er = primary (v1);
er = parse_expr (v1, er, 1);
if (er >= SYNTAX_ERROR)
return er;
switch (eval_lex (&v2))
{
case ERROR:
return UNKNOWN_INPUT;
case BADOP:
return INVALID_OPERATOR;
case RIGHTP:
return er;
default:
return MISSING_RIGHT;
}
/* Unary operators */
/* Minimize undefined C behavior on overflow. This code assumes
that the implementation-defined overflow when casting
unsigned to signed is a silent twos-complement
wrap-around. */
case PLUS:
return primary (v1);
case MINUS:
er = primary (v1);
*v1 = (int32_t) -(uint32_t) *v1;
return er;
case NOT:
er = primary (v1);
*v1 = ~*v1;
return er;
case LNOT:
er = primary (v1);
*v1 = *v1 == 0 ? 1 : 0;
return er;
/* Anything else */
case ERROR:
return UNKNOWN_INPUT;
case BADOP:
return INVALID_OPERATOR;
case EMPTY:
return EMPTY_ARGUMENT;
case EOTEXT:
return MISSING_VALUE;
default:
return SYNTAX_ERROR;
}
}
/* Parse binary operators with at least MIN_PREC precedence. */
static eval_error
parse_expr (int32_t *v1, eval_error er, unsigned min_prec)
{
eval_token et;
eval_token et2;
eval_error er2;
eval_error er3;
int32_t v2;
int32_t v3;
uint32_t u1;
uint32_t u2;
uint32_t u3;
if (er >= SYNTAX_ERROR)
return er;
et = eval_lex (&v2);
while (et / 10 >= min_prec)
{
if (et == QUESTION)
{
et2 = eval_lex (&v2);
eval_undo ();
if (et2 == COLON)
{
v2 = *v1;
er2 = er;
}
else
er2 = primary (&v2);
}
else
er2 = primary (&v2);
if (er2 >= SYNTAX_ERROR)
return er2;
et2 = eval_lex (&v3);
/* Handle binary operators of higher precedence or right-associativity */
while (et2 / 10 > et / 10 || et2 == EXPONENT ||
(et == QUESTION && et2 == QUESTION))
{
eval_undo ();
if ((er2 = parse_expr (&v2, er2, et2 / 10)) >= SYNTAX_ERROR)
return er2;
et2 = eval_lex (&v3);
}
/* Reduce the two values by the given binary operator */
switch (et)
{
case EXPONENT:
/* Minimize undefined C behavior on overflow. This code assumes
that the implementation-defined overflow when casting
unsigned to signed is a silent twos-complement
wrap-around. */
u3 = 1;
if (v2 < 0)
er = NEGATIVE_EXPONENT;
else if (*v1 == 0 && v2 == 0)
er = DIVIDE_ZERO;
else
{
u1 = *v1;
u2 = v2;
u3 = 1;
while (u2)
{
if (u2 & 1)
u3 *= u1;
u1 *= u1;
u2 >>= 1;
}
*v1 = u3;
}
break;
case TIMES:
*v1 = (int32_t) ((uint32_t) *v1 * (uint32_t) v2);
break;
case DIVIDE:
if (v2 == 0)
er = DIVIDE_ZERO;
else if (v2 == -1)
/* Avoid overflow, and the x86 SIGFPE on INT_MIN / -1. */
*v1 = (int32_t) -(uint32_t) *v1;
else
*v1 /= v2;
break;
case MODULO:
if (v2 == 0)
er = MODULO_ZERO;
else if (v2 == -1)
/* Avoid the x86 SIGFPE on INT_MIN % -1. */
*v1 = 0;
else
*v1 %= v2;
break;
case PLUS:
*v1 = (int32_t) ((uint32_t) *v1 + (uint32_t) v2);
break;
case MINUS:
*v1 = (int32_t) ((uint32_t) *v1 - (uint32_t) v2);
break;
case LSHIFT:
u1 = *v1;
u1 <<= (uint32_t) (v2 & 0x1f);
*v1 = u1;
break;
case RSHIFT:
u1 = *v1 < 0 ? ~*v1 : *v1;
u1 >>= (uint32_t) (v2 & 0x1f);
*v1 = *v1 < 0 ? ~u1 : u1;
break;
case URSHIFT:
u1 = *v1;
u1 >>= (uint32_t) (v2 & 0x1f);
*v1 = u1;
break;
case GT:
*v1 = *v1 > v2;
break;
case GTEQ:
*v1 = *v1 >= v2;
break;
case LS:
*v1 = *v1 < v2;
break;
case LSEQ:
*v1 = *v1 <= v2;
break;
case EQ:
*v1 = *v1 == v2;
break;
case NOTEQ:
*v1 = *v1 != v2;
break;
case AND:
*v1 &= v2;
break;
case XOR:
*v1 ^= v2;
break;
case OR:
*v1 |= v2;
break;
/* Implement short-circuiting of valid syntax. */
case LAND:
if (!*v1)
er2 = NO_ERROR;
*v1 = *v1 && v2;
break;
case LOR:
if (*v1)
er2 = NO_ERROR;
*v1 = *v1 || v2;
break;
case QUESTION:
if (et2 == BADOP)
er = INVALID_OPERATOR;
else if (et2 != COLON)
er = MISSING_COLON;
else
{
er3 = primary (&v3);
er3 = parse_expr (&v3, er3, 1);
if (er3 >= SYNTAX_ERROR)
return er3;
if (*v1)
*v1 = v2;
else
{
*v1 = v3;
er2 = er3;
}
}
break;
default:
assert (!"parse_expr");
abort ();
}
if (er == NO_ERROR)
er = er2;
et = et2;
}
eval_undo ();
return er;
}
/* Main entry point, called from "eval". */
bool
evaluate (const call_info *me, const char *expr, size_t len, int32_t *val)
{
eval_error err;
eval_init_lex (expr, len);
err = primary (val);
err = parse_expr (val, err, 1);
if (err == NO_ERROR && eval_text != end_text)
{
if (eval_lex (val) == BADOP)
err = INVALID_OPERATOR;
else
err = EXCESS_INPUT;
}
if (err != NO_ERROR)
expr = quotearg_style_mem (locale_quoting_style, expr, len);
switch (err)
{
/* Cases where result is printed. */
case NO_ERROR:
return false;
case EMPTY_ARGUMENT:
m4_warn (0, me, _("empty string treated as 0"));
return false;
/* Cases where error makes result meaningless. */
case MISSING_RIGHT:
m4_warn (0, me, _("missing right parenthesis: %s"), expr);
break;
case MISSING_COLON:
m4_warn (0, me, _("missing colon: %s"), expr);
break;
case SYNTAX_ERROR:
m4_warn (0, me, _("bad expression: %s"), expr);
break;
case UNKNOWN_INPUT:
m4_warn (0, me, _("bad input: %s"), expr);
break;
case EXCESS_INPUT:
m4_warn (0, me, _("excess input: %s"), expr);
break;
case INVALID_OPERATOR:
m4_warn (0, me, _("invalid operator: %s"), expr);
break;
case MISSING_VALUE:
m4_warn (0, me, _("missing operand: %s"), expr);
break;
case DIVIDE_ZERO:
m4_warn (0, me, _("divide by zero: %s"), expr);
break;
case MODULO_ZERO:
m4_warn (0, me, _("modulo by zero: %s"), expr);
break;
case NEGATIVE_EXPONENT:
m4_warn (0, me, _("negative exponent: %s"), expr);
break;
default:
assert (!"evaluate");
abort ();
}
return true;
}