blob: 4f62dcf16c38bb2d73a04f587942a66e860fc4da [file] [log] [blame]
/* GNU m4 -- A simple macro processor
Copyright (C) 1989-1994, 2003, 2006-2014, 2016-2017, 2020-2025 Free
Software Foundation, Inc.
This file is part of GNU M4.
GNU M4 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GNU M4 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* This file handles all the low level work around the symbol table. The
symbol table is a simple chained hash table. Each symbol is described
by a struct symbol, which is placed in the hash table based upon the
symbol name. Symbols that hash to the same entry in the table are
kept on a list, sorted by hash. */
#include "m4.h"
#include "bitrotate.h"
#include "hash.h"
#ifdef DEBUG_SYM
/* When evaluating hash table performance, this profiling code shows
how many collisions were encountered. */
struct profile
{
int entry; /* Number of times lookup_symbol called
with this mode. */
int allocations; /* Number of times a symbol is malloc'd. */
int hits; /* Number of times a symbol is found. */
int checks; /* Number of times a hash is checked. */
int comparisons; /* Number of times memcmp was called. */
int misses; /* Number of times memcmp did not return 0. */
long long bytes_hashed; /* Number of bytes hashed. */
long long bytes_compared; /* Number of bytes compared. */
};
static struct profile profiles[5];
static symbol_lookup current_mode;
static unsigned long long hash_entry;
static unsigned long long comparator_entry;
static size_t current_size;
static unsigned int resizes;
/* On exit, show a profile of symbol table performance. */
static void
show_profile (void)
{
int i;
FILE *f = fopen ("/dev/tty", "w");
for (i = 0; i < 5; i++)
{
xfprintf (f, "m4debug: lookup mode %d called %d times, %d hits:\n"
"m4debug: symbols: %d allocs, %d checks, %lld bytes hashed\n"
"m4debug: str: %d compares, %d misses, %lld bytes compared\n",
i, profiles[i].entry, profiles[i].hits,
profiles[i].allocations, profiles[i].checks,
profiles[i].bytes_hashed, profiles[i].comparisons,
profiles[i].misses, profiles[i].bytes_compared);
}
xfprintf (f, "m4: %llu hash callbacks, %llu compare callbacks, "
"%zu buckets, %u resizes\n",
hash_entry, comparator_entry, current_size, resizes - 1);
fclose (f);
}
/* Like memcmp (S1, S2, L), but also track profiling statistics. */
static int
profile_memcmp (const char *s1, const char *s2, size_t l)
{
int i = 0;
int result;
while (l && *s1 == *s2)
{
s1++;
s2++;
i++;
l--;
}
result = l ? (unsigned char) *s1 - (unsigned char) *s2 : 0;
profiles[current_mode].comparisons++;
if (result != 0)
profiles[current_mode].misses++;
profiles[current_mode].bytes_compared += i;
return result;
}
# define memcmp profile_memcmp
#endif /* DEBUG_SYM */
/* Pointer to symbol table. */
static Hash_table *symtab;
/* Return a hashvalue for a string S of length LEN. */
static size_t ATTRIBUTE_PURE
hash (const char *s, size_t len)
{
size_t val = len;
/* This algorithm was originally borrowed from GNU Emacs, but has
been modified to allow embedded NUL. */
while (len--)
val = rotl_sz (val, 7) + to_uchar (*s++);
return val;
}
/* Wrap our hash inside signature expected by hash.h. */
static size_t
symtab_hasher (const void *entry, size_t buckets)
{
#ifdef DEBUG_SYM
hash_entry++;
if (buckets != current_size)
{
resizes++;
current_size = buckets;
}
#endif /* DEBUG_SYM */
const symbol *sym = (const symbol *) entry;
return sym->hash % buckets;
}
/* Compare two hash table entries for equality. */
static bool
symtab_comparator (const void *entry_a, const void *entry_b)
{
#ifdef DEBUG_SYM
comparator_entry++;
profiles[current_mode].checks++;
#endif /* DEBUG_SYM */
const symbol *sym_a = (const symbol *) entry_a;
const symbol *sym_b = (const symbol *) entry_b;
return (sym_a->hash == sym_b->hash
&& SYMBOL_NAME_LEN (sym_a) == SYMBOL_NAME_LEN (sym_b)
&& memcmp (SYMBOL_NAME (sym_a), SYMBOL_NAME (sym_b),
SYMBOL_NAME_LEN (sym_a)) == 0);
}
/* Reclaim ENTRY on program exit. */
static void
symtab_free_entry (void *entry)
{
symbol *sym = (symbol *) entry;
while (sym->stack != sym)
{
symbol *old = sym->stack;
sym->stack = old->stack;
assert (!SYMBOL_PENDING_EXPANSIONS (old));
free_symbol (old);
}
assert (!SYMBOL_PENDING_EXPANSIONS (sym));
free_symbol (sym);
}
/* Initialize the symbol table, with SIZE as a hint for expected
number of entries. */
void
symtab_init (size_t size)
{
symtab = hash_initialize (size, NULL, symtab_hasher, symtab_comparator,
symtab_free_entry);
if (!symtab)
xalloc_die ();
#ifdef DEBUG_SYM
atexit (show_profile); /* Ignore failure, since this is debug code. */
#endif /* DEBUG_SYM */
}
/* Clean up entire table. */
void
symtab_free (void)
{
hash_free (symtab);
}
/* Free all storage associated with a symbol. */
void
free_symbol (symbol *sym)
{
if (SYMBOL_PENDING_EXPANSIONS (sym) > 0)
SYMBOL_DELETED (sym) = true;
else
{
if (sym->stack == NULL)
free (SYMBOL_NAME (sym));
if (SYMBOL_TYPE (sym) == TOKEN_TEXT)
free (SYMBOL_TEXT (sym));
free (sym);
}
}
/* Searches and manipulation of the symbol table are all done by
lookup_symbol (). It basically hashes NAME, of length LEN, to a
list in the symbol table, and searches this list for the first
occurrence of a symbol with the name.
The MODE parameter determines what lookup_symbol () will do. It
can either just do a lookup, do a lookup and insert if not present,
do an insertion even if the name is already in the list, delete the
first occurrence of the name on the list, or delete all occurrences
of the name on the list. The return value when requesting deletion
is non-NULL if deletion occurred, but must not be dereferenced. */
symbol *
lookup_symbol (const char *name, size_t len, symbol_lookup mode)
{
symbol *sym;
symbol *entry;
symbol tmp;
#if DEBUG_SYM
current_mode = mode;
profiles[mode].entry++;
profiles[mode].bytes_hashed += len;
#endif /* DEBUG_SYM */
tmp.name = (char *) name;
tmp.len = len;
tmp.hash = hash (name, len);
entry = (symbol *) hash_lookup (symtab, &tmp);
#if DEBUG_SYM
if (entry)
profiles[mode].hits++;
#endif /* DEBUG_SYM */
switch (mode)
{
case SYMBOL_LOOKUP:
return entry ? entry->stack : NULL;
case SYMBOL_INSERT:
/* If the name was found in the table, check whether it is still in
use by a pending expansion. If so, replace the table element with
a new one; if not, just return the symbol. If not found, just
insert the name, and return the new symbol. */
if (entry)
{
sym = entry->stack;
if (SYMBOL_PENDING_EXPANSIONS (sym) > 0)
{
symbol *old = sym;
SYMBOL_DELETED (old) = true;
#ifdef DEBUG_SYM
profiles[mode].allocations++;
#endif
sym = (symbol *) xmalloc (sizeof *sym);
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = SYMBOL_TRACED (old);
sym->hash = old->hash;
SYMBOL_NAME (sym) = old->name;
old->name = xmemdup0 (name, len);
SYMBOL_NAME_LEN (sym) = len;
SYMBOL_MACRO_ARGS (sym) = false;
SYMBOL_BLIND_NO_ARGS (sym) = false;
SYMBOL_DELETED (sym) = false;
SYMBOL_PENDING_EXPANSIONS (sym) = 0;
if (old == entry)
{
old = (symbol *) hash_remove (symtab, entry);
assert (entry == old);
sym->stack = sym;
entry = (symbol *) hash_insert (symtab, sym);
if (entry)
assert (sym == entry);
else
xalloc_die ();
}
else
{
entry->stack = sym;
sym->stack = old->stack;
}
old->stack = NULL;
}
return sym;
}
FALLTHROUGH;
case SYMBOL_PUSHDEF:
/* Insert a name in the symbol table. If there is already a
symbol with the name, add it to the pushdef stack. Since the
hash table does not allow the insertion of duplicates, the
pushdef stack is a circular chain; the hash entry is the
oldest entry, which points to the newest entry; all other
entries point to the next older entry. */
#ifdef DEBUG_SYM
profiles[mode].allocations++;
#endif
sym = (symbol *) xmalloc (sizeof *sym);
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = false;
sym->hash = tmp.hash;
SYMBOL_NAME_LEN (sym) = len;
SYMBOL_MACRO_ARGS (sym) = false;
SYMBOL_BLIND_NO_ARGS (sym) = false;
SYMBOL_DELETED (sym) = false;
SYMBOL_PENDING_EXPANSIONS (sym) = 0;
if (entry)
{
assert (mode == SYMBOL_PUSHDEF);
sym->stack = entry->stack;
entry->stack = sym;
SYMBOL_TRACED (sym) = SYMBOL_TRACED (sym->stack);
SYMBOL_NAME (sym) = entry->name;
}
else
{
SYMBOL_NAME (sym) = xmemdup0 (name, len);
sym->stack = sym;
entry = (symbol *) hash_insert (symtab, sym);
if (entry)
assert (sym == entry);
else
xalloc_die ();
}
return sym;
case SYMBOL_DELETE:
case SYMBOL_POPDEF:
/* Delete occurrences of symbols with NAME. SYMBOL_DELETE kills
all definitions, SYMBOL_POPDEF kills only the first.
However, if the last instance of a symbol is marked for
tracing, reinsert a placeholder in the table. And if the
definition is still in use, let the caller free the memory
after it is done with the symbol. */
if (!entry
|| (SYMBOL_TYPE (entry) == TOKEN_VOID && entry->stack == entry
&& SYMBOL_TRACED (entry)))
return NULL;
{
bool traced = false;
symbol *result = sym = entry->stack;
if (sym != entry && mode == SYMBOL_POPDEF)
{
SYMBOL_TRACED (sym->stack) = SYMBOL_TRACED (sym);
entry->stack = sym->stack;
sym->stack = NULL;
sym->name = NULL;
free_symbol (sym);
}
else
{
traced = SYMBOL_TRACED (sym);
while (sym != entry)
{
symbol *old = sym;
sym = sym->stack;
old->stack = NULL;
old->name = NULL;
free_symbol (old);
}
sym = (symbol *) hash_remove (symtab, entry);
assert (sym == entry);
sym->stack = NULL;
free_symbol (sym);
}
if (traced)
{
#ifdef DEBUG_SYM
profiles[mode].allocations++;
#endif
sym = (symbol *) xmalloc (sizeof *sym);
SYMBOL_TYPE (sym) = TOKEN_VOID;
SYMBOL_TRACED (sym) = true;
sym->hash = tmp.hash;
SYMBOL_NAME (sym) = xmemdup0 (name, len);
SYMBOL_NAME_LEN (sym) = len;
SYMBOL_MACRO_ARGS (sym) = false;
SYMBOL_BLIND_NO_ARGS (sym) = false;
SYMBOL_DELETED (sym) = false;
SYMBOL_PENDING_EXPANSIONS (sym) = 0;
sym->stack = sym;
entry = (symbol *) hash_insert (symtab, sym);
if (entry)
assert (sym == entry);
else
xalloc_die ();
}
return result;
}
default:
assert (!"symbol_lookup");
abort ();
}
}
/* The following function is used for the cases where we want to do
something to each and every symbol in the table. The function
hack_all_symbols () traverses the symbol table, and calls a
specified function FUNC for each symbol in the table. FUNC is
called with a pointer to the symbol, and the DATA argument.
FUNC may safely call lookup_symbol with mode SYMBOL_POPDEF or
SYMBOL_LOOKUP, but any other mode can break the iteration. */
void
hack_all_symbols (hack_symbol *func, void *data)
{
symbol *sym = (symbol *) hash_get_first (symtab);
symbol *next;
while (sym)
{
/* We allow func to call SYMBOL_POPDEF, which can invalidate
sym, so we must grab the next element to traverse before
calling func. */
next = (symbol *) hash_get_next (symtab, sym);
func (sym->stack, data);
sym = next;
}
}
#ifdef DEBUG_SYM
static void symtab_print_list (int i);
static void MAYBE_UNUSED
symtab_debug (void)
{
token_data td;
const char *text;
symbol *s;
int delete;
size_t len;
int line;
static int i;
while (next_token (&td, &line, NULL, false, NULL) == TOKEN_WORD)
{
text = TOKEN_DATA_TEXT (&td);
len = TOKEN_DATA_LEN (&td);
if (*text == '_')
{
delete = 1;
text++;
len--;
}
else
delete = 0;
s = lookup_symbol (text, len, SYMBOL_LOOKUP);
if (s == NULL)
xprintf ("Name `%s' is unknown\n", text);
lookup_symbol (text, len, delete ? SYMBOL_DELETE : SYMBOL_INSERT);
}
symtab_print_list (i++);
}
static void
symtab_print_list (int i)
{
symbol *sym = (symbol *) hash_get_first (symtab);
symbol *stack;
xprintf ("Symbol dump #%d:\n", i);
while (sym)
{
stack = sym->stack;
do
{
xprintf ("\tname %s, len %zu, hash %zu, addr %p, "
"stack %p, flags%s%s, pending %d\n",
SYMBOL_NAME (stack), SYMBOL_NAME_LEN (stack), stack->hash,
stack, stack->stack,
SYMBOL_TRACED (stack) ? " traced" : "",
SYMBOL_DELETED (stack) ? " deleted" : "",
SYMBOL_PENDING_EXPANSIONS (stack));
stack = stack->stack;
}
while (stack != sym);
sym = (symbol *) hash_get_next (symtab, sym);
}
}
#endif /* DEBUG_SYM */