blob: b72e5cd1e561ba3b3d226171d378c2f810cc45de [file] [log] [blame]
/* GNU m4 -- A simple macro processor
Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 2006, 2007, 2008
Free Software Foundation, Inc.
This file is part of GNU M4.
GNU M4 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GNU M4 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/* This file contains the functions to evaluate integer expressions for
the "eval" macro. It is a little, fairly self-contained module, with
its own scanner, and a recursive descent parser. The only entry point
is evaluate (). */
#include "m4.h"
/* Evaluates token types. */
typedef enum eval_token
{
ERROR, BADOP,
PLUS, MINUS,
EXPONENT,
TIMES, DIVIDE, MODULO,
ASSIGN, EQ, NOTEQ, GT, GTEQ, LS, LSEQ,
LSHIFT, RSHIFT,
LNOT, LAND, LOR,
NOT, AND, OR, XOR,
LEFTP, RIGHTP,
NUMBER, EOTEXT
}
eval_token;
/* Error types. */
typedef enum eval_error
{
NO_ERROR,
DIVIDE_ZERO,
MODULO_ZERO,
NEGATIVE_EXPONENT,
/* All errors prior to SYNTAX_ERROR can be ignored in a dead
branch of && and ||. All errors after are just more details
about a syntax error. */
SYNTAX_ERROR,
MISSING_RIGHT,
UNKNOWN_INPUT,
EXCESS_INPUT,
INVALID_OPERATOR,
EMPTY_ARGUMENT
}
eval_error;
static eval_error logical_or_term (const call_info *, eval_token, int32_t *);
static eval_error logical_and_term (const call_info *, eval_token, int32_t *);
static eval_error or_term (const call_info *, eval_token, int32_t *);
static eval_error xor_term (const call_info *, eval_token, int32_t *);
static eval_error and_term (const call_info *, eval_token, int32_t *);
static eval_error equality_term (const call_info *, eval_token, int32_t *);
static eval_error cmp_term (const call_info *, eval_token, int32_t *);
static eval_error shift_term (const call_info *, eval_token, int32_t *);
static eval_error add_term (const call_info *, eval_token, int32_t *);
static eval_error mult_term (const call_info *, eval_token, int32_t *);
static eval_error exp_term (const call_info *, eval_token, int32_t *);
static eval_error unary_term (const call_info *, eval_token, int32_t *);
static eval_error simple_term (const call_info *, eval_token, int32_t *);
/*--------------------.
| Lexical functions. |
`--------------------*/
/* Pointer to next character of input text. */
static const char *eval_text;
/* Value of eval_text, from before last call of eval_lex (). This is so we
can back up, if we have read too much. */
static const char *last_text;
/* Detect when to end parsing. */
static const char *end_text;
/* Prime the lexer at the start of TEXT, with length LEN. */
static void
eval_init_lex (const char *text, size_t len)
{
eval_text = text;
end_text = text + len;
last_text = NULL;
}
static void
eval_undo (void)
{
eval_text = last_text;
}
/* VAL is numerical value, if any. */
static eval_token
eval_lex (int32_t *val)
{
while (eval_text != end_text && isspace (to_uchar (*eval_text)))
eval_text++;
last_text = eval_text;
if (eval_text == end_text)
return EOTEXT;
if (isdigit (to_uchar (*eval_text)))
{
int base, digit;
if (*eval_text == '0')
{
eval_text++;
switch (*eval_text)
{
case 'x':
case 'X':
base = 16;
eval_text++;
break;
case 'b':
case 'B':
base = 2;
eval_text++;
break;
case 'r':
case 'R':
base = 0;
eval_text++;
while (isdigit (to_uchar (*eval_text)) && base <= 36)
base = 10 * base + *eval_text++ - '0';
if (base == 0 || base > 36 || *eval_text != ':')
return ERROR;
eval_text++;
break;
default:
base = 8;
}
}
else
base = 10;
/* FIXME - this calculation can overflow. Consider xstrtol. */
*val = 0;
for (; *eval_text; eval_text++)
{
if (isdigit (to_uchar (*eval_text)))
digit = *eval_text - '0';
else if (islower (to_uchar (*eval_text)))
digit = *eval_text - 'a' + 10;
else if (isupper (to_uchar (*eval_text)))
digit = *eval_text - 'A' + 10;
else
break;
if (base == 1)
{
if (digit == 1)
(*val)++;
else if (digit == 0 && !*val)
continue;
else
break;
}
else if (digit >= base)
break;
else
*val = *val * base + digit;
}
return NUMBER;
}
switch (*eval_text++)
{
case '+':
if (*eval_text == '+' || *eval_text == '=')
return BADOP;
return PLUS;
case '-':
if (*eval_text == '-' || *eval_text == '=')
return BADOP;
return MINUS;
case '*':
if (*eval_text == '*')
{
eval_text++;
return EXPONENT;
}
else if (*eval_text == '=')
return BADOP;
return TIMES;
case '/':
if (*eval_text == '=')
return BADOP;
return DIVIDE;
case '%':
if (*eval_text == '=')
return BADOP;
return MODULO;
case '=':
if (*eval_text == '=')
{
eval_text++;
return EQ;
}
return ASSIGN;
case '!':
if (*eval_text == '=')
{
eval_text++;
return NOTEQ;
}
return LNOT;
case '>':
if (*eval_text == '=')
{
eval_text++;
return GTEQ;
}
else if (*eval_text == '>')
{
if (*++eval_text == '=')
return BADOP;
return RSHIFT;
}
return GT;
case '<':
if (*eval_text == '=')
{
eval_text++;
return LSEQ;
}
else if (*eval_text == '<')
{
if (*++eval_text == '=')
return BADOP;
return LSHIFT;
}
return LS;
case '^':
if (*eval_text == '=')
return BADOP;
return XOR;
case '~':
return NOT;
case '&':
if (*eval_text == '&')
{
eval_text++;
return LAND;
}
else if (*eval_text == '=')
return BADOP;
return AND;
case '|':
if (*eval_text == '|')
{
eval_text++;
return LOR;
}
else if (*eval_text == '=')
return BADOP;
return OR;
case '(':
return LEFTP;
case ')':
return RIGHTP;
default:
return ERROR;
}
}
/*---------------------------------------.
| Main entry point, called from "eval". |
`---------------------------------------*/
bool
evaluate (const call_info *me, const char *expr, size_t len, int32_t *val)
{
eval_token et;
eval_error err;
eval_init_lex (expr, len);
et = eval_lex (val);
if (et == EOTEXT)
err = EMPTY_ARGUMENT;
else
err = logical_or_term (me, et, val);
if (err == NO_ERROR && *eval_text != '\0')
{
if (eval_lex (val) == BADOP)
err = INVALID_OPERATOR;
else
err = EXCESS_INPUT;
}
if (err != NO_ERROR)
expr = quotearg_style_mem (locale_quoting_style, expr, len);
switch (err)
{
/* Cases where result is printed. */
case NO_ERROR:
return false;
case EMPTY_ARGUMENT:
m4_warn (0, me, _("empty string treated as 0"));
return false;
/* Cases where error makes result meaningless. */
case MISSING_RIGHT:
m4_warn (0, me, _("missing right parenthesis: %s"), expr);
break;
case SYNTAX_ERROR:
m4_warn (0, me, _("bad expression: %s"), expr);
break;
case UNKNOWN_INPUT:
m4_warn (0, me, _("bad input in expression: %s"), expr);
break;
case EXCESS_INPUT:
m4_warn (0, me, _("excess input in expression: %s"), expr);
break;
case INVALID_OPERATOR:
m4_error (0, 0, me, _("invalid operator: %s"), expr);
break;
case DIVIDE_ZERO:
m4_warn (0, me, _("divide by zero: %s"), expr);
break;
case MODULO_ZERO:
m4_warn (0, me, _("modulo by zero: %s"), expr);
break;
case NEGATIVE_EXPONENT:
m4_warn (0, me, _("negative exponent: %s"), expr);
break;
default:
assert (!"evaluate");
abort ();
}
return true;
}
/*---------------------------.
| Recursive descent parser. |
`---------------------------*/
static eval_error
logical_or_term (const call_info *me, eval_token et, int32_t *v1)
{
int32_t v2;
eval_error er;
if ((er = logical_and_term (me, et, v1)) != NO_ERROR)
return er;
while ((et = eval_lex (&v2)) == LOR)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
/* Implement short-circuiting of valid syntax. */
er = logical_and_term (me, et, &v2);
if (er == NO_ERROR)
*v1 = *v1 || v2;
else if (*v1 != 0 && er < SYNTAX_ERROR)
*v1 = 1;
else
return er;
}
if (et == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
logical_and_term (const call_info *me, eval_token et, int32_t *v1)
{
int32_t v2;
eval_error er;
if ((er = or_term (me, et, v1)) != NO_ERROR)
return er;
while ((et = eval_lex (&v2)) == LAND)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
/* Implement short-circuiting of valid syntax. */
er = or_term (me, et, &v2);
if (er == NO_ERROR)
*v1 = *v1 && v2;
else if (*v1 == 0 && er < SYNTAX_ERROR)
; /* v1 is already 0 */
else
return er;
}
if (et == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
or_term (const call_info *me, eval_token et, int32_t *v1)
{
int32_t v2;
eval_error er;
if ((er = xor_term (me, et, v1)) != NO_ERROR)
return er;
while ((et = eval_lex (&v2)) == OR)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = xor_term (me, et, &v2)) != NO_ERROR)
return er;
*v1 |= v2;
}
if (et == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
xor_term (const call_info *me, eval_token et, int32_t *v1)
{
int32_t v2;
eval_error er;
if ((er = and_term (me, et, v1)) != NO_ERROR)
return er;
while ((et = eval_lex (&v2)) == XOR)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = and_term (me, et, &v2)) != NO_ERROR)
return er;
*v1 ^= v2;
}
if (et == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
and_term (const call_info *me, eval_token et, int32_t *v1)
{
int32_t v2;
eval_error er;
if ((er = equality_term (me, et, v1)) != NO_ERROR)
return er;
while ((et = eval_lex (&v2)) == AND)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = equality_term (me, et, &v2)) != NO_ERROR)
return er;
*v1 &= v2;
}
if (et == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
equality_term (const call_info *me, eval_token et, int32_t *v1)
{
eval_token op;
int32_t v2;
eval_error er;
if ((er = cmp_term (me, et, v1)) != NO_ERROR)
return er;
/* In the 1.4.x series, we maintain the traditional behavior that
'=' is a synonym for '=='; however, this is contrary to POSIX and
we hope to convert '=' to mean assignment in 2.0. */
while ((op = eval_lex (&v2)) == EQ || op == NOTEQ || op == ASSIGN)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = cmp_term (me, et, &v2)) != NO_ERROR)
return er;
if (op == ASSIGN)
{
m4_warn (0, me, _("recommend ==, not =, for equality"));
op = EQ;
}
*v1 = (op == EQ) == (*v1 == v2);
}
if (op == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
cmp_term (const call_info *me, eval_token et, int32_t *v1)
{
eval_token op;
int32_t v2;
eval_error er;
if ((er = shift_term (me, et, v1)) != NO_ERROR)
return er;
while ((op = eval_lex (&v2)) == GT || op == GTEQ
|| op == LS || op == LSEQ)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = shift_term (me, et, &v2)) != NO_ERROR)
return er;
switch (op)
{
case GT:
*v1 = *v1 > v2;
break;
case GTEQ:
*v1 = *v1 >= v2;
break;
case LS:
*v1 = *v1 < v2;
break;
case LSEQ:
*v1 = *v1 <= v2;
break;
default:
assert (!"cmp_term");
abort ();
}
}
if (op == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
shift_term (const call_info *me, eval_token et, int32_t *v1)
{
eval_token op;
int32_t v2;
uint32_t u1;
eval_error er;
if ((er = add_term (me, et, v1)) != NO_ERROR)
return er;
while ((op = eval_lex (&v2)) == LSHIFT || op == RSHIFT)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = add_term (me, et, &v2)) != NO_ERROR)
return er;
/* Minimize undefined C behavior (shifting by a negative number,
shifting by the width or greater, left shift overflow, or
right shift of a negative number). Implement Java 32-bit
wrap-around semantics. This code assumes that the
implementation-defined overflow when casting unsigned to
signed is a silent twos-complement wrap-around. */
switch (op)
{
case LSHIFT:
u1 = *v1;
u1 <<= (uint32_t) (v2 & 0x1f);
*v1 = u1;
break;
case RSHIFT:
u1 = *v1 < 0 ? ~*v1 : *v1;
u1 >>= (uint32_t) (v2 & 0x1f);
*v1 = *v1 < 0 ? ~u1 : u1;
break;
default:
assert (!"shift_term");
abort ();
}
}
if (op == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
add_term (const call_info *me, eval_token et, int32_t *v1)
{
eval_token op;
int32_t v2;
eval_error er;
if ((er = mult_term (me, et, v1)) != NO_ERROR)
return er;
while ((op = eval_lex (&v2)) == PLUS || op == MINUS)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = mult_term (me, et, &v2)) != NO_ERROR)
return er;
/* Minimize undefined C behavior on overflow. This code assumes
that the implementation-defined overflow when casting
unsigned to signed is a silent twos-complement
wrap-around. */
if (op == PLUS)
*v1 = (int32_t) ((uint32_t) *v1 + (uint32_t) v2);
else
*v1 = (int32_t) ((uint32_t) *v1 - (uint32_t) v2);
}
if (op == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
mult_term (const call_info *me, eval_token et, int32_t *v1)
{
eval_token op;
int32_t v2;
eval_error er;
if ((er = exp_term (me, et, v1)) != NO_ERROR)
return er;
while ((op = eval_lex (&v2)) == TIMES || op == DIVIDE || op == MODULO)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = exp_term (me, et, &v2)) != NO_ERROR)
return er;
/* Minimize undefined C behavior on overflow. This code assumes
that the implementation-defined overflow when casting
unsigned to signed is a silent twos-complement
wrap-around. */
switch (op)
{
case TIMES:
*v1 = (int32_t) ((uint32_t) *v1 * (uint32_t) v2);
break;
case DIVIDE:
if (v2 == 0)
return DIVIDE_ZERO;
else if (v2 == -1)
/* Avoid overflow, and the x86 SIGFPE on INT_MIN / -1. */
*v1 = (int32_t) -(uint32_t) *v1;
else
*v1 /= v2;
break;
case MODULO:
if (v2 == 0)
return MODULO_ZERO;
else if (v2 == -1)
/* Avoid the x86 SIGFPE on INT_MIN % -1. */
*v1 = 0;
else
*v1 %= v2;
break;
default:
assert (!"mult_term");
abort ();
}
}
if (op == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
exp_term (const call_info *me, eval_token et, int32_t *v1)
{
uint32_t result;
int32_t v2;
eval_error er;
if ((er = unary_term (me, et, v1)) != NO_ERROR)
return er;
while ((et = eval_lex (&v2)) == EXPONENT)
{
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = exp_term (me, et, &v2)) != NO_ERROR)
return er;
/* Minimize undefined C behavior on overflow. This code assumes
that the implementation-defined overflow when casting
unsigned to signed is a silent twos-complement
wrap-around. */
result = 1;
if (v2 < 0)
return NEGATIVE_EXPONENT;
if (*v1 == 0 && v2 == 0)
return DIVIDE_ZERO;
while (v2-- > 0)
result *= (uint32_t) *v1;
*v1 = result;
}
if (et == ERROR)
return UNKNOWN_INPUT;
eval_undo ();
return NO_ERROR;
}
static eval_error
unary_term (const call_info *me, eval_token et, int32_t *v1)
{
eval_token et2 = et;
eval_error er;
if (et == PLUS || et == MINUS || et == NOT || et == LNOT)
{
et2 = eval_lex (v1);
if (et2 == ERROR)
return UNKNOWN_INPUT;
if ((er = unary_term (me, et2, v1)) != NO_ERROR)
return er;
/* Minimize undefined C behavior on overflow. This code assumes
that the implementation-defined overflow when casting
unsigned to signed is a silent twos-complement
wrap-around. */
if (et == MINUS)
*v1 = (int32_t) -(uint32_t) *v1;
else if (et == NOT)
*v1 = ~*v1;
else if (et == LNOT)
*v1 = *v1 == 0 ? 1 : 0;
}
else if ((er = simple_term (me, et, v1)) != NO_ERROR)
return er;
return NO_ERROR;
}
static eval_error
simple_term (const call_info *me, eval_token et, int32_t *v1)
{
int32_t v2;
eval_error er;
switch (et)
{
case LEFTP:
et = eval_lex (v1);
if (et == ERROR)
return UNKNOWN_INPUT;
if ((er = logical_or_term (me, et, v1)) != NO_ERROR)
return er;
et = eval_lex (&v2);
if (et == ERROR)
return UNKNOWN_INPUT;
if (et != RIGHTP)
return MISSING_RIGHT;
break;
case NUMBER:
break;
case BADOP:
return INVALID_OPERATOR;
default:
return SYNTAX_ERROR;
}
return NO_ERROR;
}