blob: 5eb0296256e280040dd790d2942773421a37b139 [file] [log] [blame]
/* GNU m4 -- A simple macro processor
Copyright (C) 1989-1994, 2004-2014, 2016-2017, 2020-2025 Free
Software Foundation, Inc.
This file is part of GNU M4.
GNU M4 is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
GNU M4 is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
/* We use <config.h> instead of "config.h" so that a compilation
using -I. -I$srcdir will use ./config.h rather than $srcdir/config.h
(which it would do because it found this file in $srcdir). */
#include <config.h>
#include <assert.h>
#include <c-ctype.h>
#include <errno.h>
#include <error.h>
#include <limits.h>
#include <locale.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include "attribute.h"
#include "binary-io.h"
#include "clean-temp.h"
#include "cloexec.h"
#include "close-stream.h"
#include "closein.h"
#include "dirname.h"
#include "exitfail.h"
#include "filenamecat.h"
#include "ignore-value.h"
#include "intprops.h"
#include "obstack.h"
#include "quotearg.h"
#include "stdio--.h"
#include "stdlib--.h"
#include "unistd--.h"
#include "vasnprintf.h"
#include "verify.h"
#include "xalloc.h"
#include "xmemdup0.h"
#include "xprintf.h"
#include "xvasprintf.h"
/* Canonicalize UNIX recognition macros. */
#if defined unix || defined __unix || defined __unix__ \
|| defined _POSIX_VERSION || defined _POSIX2_VERSION \
|| defined __NetBSD__ || defined __OpenBSD__ \
|| defined __APPLE__ || defined __APPLE_CC__
# define UNIX 1
#endif
/* Canonicalize Windows recognition macros. */
#if (defined _WIN32 || defined __WIN32__) && !defined __CYGWIN__
# define W32_NATIVE 1
#endif
/* Canonicalize OS/2 recognition macro. */
#ifdef __EMX__
# define OS2 1
# undef UNIX
#endif
/* Used if any programmer error is detected (not possible, right?) */
#define EXIT_INTERNAL_ERROR 2
/* Used for version mismatch, when -R detects a frozen file it can't parse. */
#define EXIT_MISMATCH 63
/* NLS. */
#include "gettext.h"
#if ! ENABLE_NLS
# undef textdomain
# define textdomain(Domainname) /* empty */
# undef bindtextdomain
# define bindtextdomain(Domainname, Dirname) /* empty */
#endif
extern int makedep_gen_missing; /* --makedep-gen-missing-* */
/* Bit masks indicating places a file is referenced from. */
#define REF_CMD_LINE 0x01 /* File referenced from command line */
#define REF_INCLUDE 0x02 /* File referenced from m4_include() */
#define REF_SINCLUDE 0x04 /* File referenced from m4_sinclude() */
#define REF_ALL 0x07 /* All of the above */
#define REF_NONE 0x00 /* None of the above */
#define _(msgid) gettext (msgid)
/* Various declarations. */
/* Describes a pair of strings, such as begin and end quotes. */
struct string_pair
{
char *str1;
size_t len1;
char *str2;
size_t len2;
};
typedef struct string_pair string_pair;
/* Memory allocation. */
#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free
/* These must come first. */
typedef struct token_data token_data;
typedef struct macro_arguments macro_arguments;
typedef void builtin_func (struct obstack *, int, macro_arguments *);
/* Gnulib's stdbool doesn't work with bool bitfields. For nicer
debugging, use bool when we know it works, but use the more
portable unsigned int elsewhere. */
#if _GL_GNUC_PREREQ (2, 0)
typedef bool bool_bitfield;
#else
typedef unsigned int bool_bitfield;
#endif /* ! __GNUC__ */
/* File: m4.c --- global definitions. */
/* Option flags. */
extern int sync_output; /* -s */
extern int debug_level; /* -d */
extern int no_gnu_extensions; /* -G */
extern int prefix_all_builtins; /* -P */
extern size_t max_debug_argument_length; /* -l */
extern int suppress_warnings; /* -Q */
extern int warning_status; /* -E */
extern int nesting_limit; /* -L */
/* Error handling. */
/* A structure containing context that was valid when a macro call
started collecting arguments; used for tracing and error messages
even when the global context changes in the meantime. */
struct call_info
{
const char *file; /* The file containing the macro invocation. */
int line; /* The line the macro was called on. */
int call_id; /* The unique sequence call id of the macro. */
int trace:1; /* True to trace this macro. */
int debug_level:31; /* The debug level when the macro started. */
unsigned int start; /* The start offset of the trace header. */
unsigned int rest; /* The offset after the header. */
const char *name; /* The macro name. */
size_t name_len; /* The length of name. */
};
typedef struct call_info call_info;
extern int retcode;
/* *INDENT-OFF* */
extern void m4_error (int, int, const call_info *, const char *, ...)
ATTRIBUTE_COLD ATTRIBUTE_FORMAT ((__printf__, 4, 5));
extern void m4_warn (int, const call_info *, const char *, ...)
ATTRIBUTE_FORMAT ((__printf__, 3, 4));
/* *INDENT-ON* */
/* File: debug.c --- debugging and tracing function. */
extern FILE *debug;
/* The value of debug_level is a bitmask of the following. */
/* a: show arglist in trace output */
#define DEBUG_TRACE_ARGS 0x001
/* e: show expansion in trace output */
#define DEBUG_TRACE_EXPANSION 0x002
/* q: quote args and expansion in trace output */
#define DEBUG_TRACE_QUOTE 0x004
/* t: trace all macros -- overrides trace{on,off} */
#define DEBUG_TRACE_ALL 0x008
/* l: add line numbers to trace output */
#define DEBUG_TRACE_LINE 0x010
/* f: add file name to trace output */
#define DEBUG_TRACE_FILE 0x020
/* p: trace path search of include files */
#define DEBUG_TRACE_PATH 0x040
/* c: show macro call before args collection */
#define DEBUG_TRACE_CALL 0x080
/* i: trace changes of input files */
#define DEBUG_TRACE_INPUT 0x100
/* x: add call id to trace output */
#define DEBUG_TRACE_CALLID 0x200
/* d: warn if dereferencing undefined macro */
#define DEBUG_TRACE_DEREF 0x400
/* o: output dumpdef to stderr, not debug file */
#define DEBUG_TRACE_OUTPUT_DUMPDEF 0x800
/* V: very verbose -- print everything */
#define DEBUG_TRACE_VERBOSE 0xFFF
/* default flags -- equiv: adeq */
#define DEBUG_TRACE_DEFAULT 0x407
extern void debug_init (void);
extern int debug_decode (const char *, size_t);
extern void debug_dump (struct obstack *obs);
extern void debug_flush_files (void);
extern bool debug_set_output (const call_info *, const char *);
/* *INDENT-OFF* */
extern void debug_message (const char *, ...)
ATTRIBUTE_FORMAT ((__printf__, 1, 2));
/* *INDENT_ON* */
extern void trace_pre (call_info *);
extern void trace_args (macro_arguments *);
extern void trace_post (const call_info *);
/* File: input.c --- lexical definitions. */
typedef struct token_chain token_chain;
/* Various different token types. Avoid overlap with token_data_type,
since the shared prefix of the enumerators is a bit confusing. */
enum token_type
{
TOKEN_EOF = 4,/* End of file, TOKEN_VOID. */
TOKEN_STRING, /* Quoted string, TOKEN_TEXT or TOKEN_COMP. */
TOKEN_COMMENT,/* Comment, TOKEN_TEXT or TOKEN_COMP. */
TOKEN_WORD, /* An identifier, TOKEN_TEXT. */
TOKEN_OPEN, /* Active character `(', TOKEN_TEXT. */
TOKEN_COMMA, /* Active character `,', TOKEN_TEXT. */
TOKEN_CLOSE, /* Active character `)', TOKEN_TEXT. */
TOKEN_SIMPLE, /* Any other single character, TOKEN_TEXT. */
TOKEN_MACDEF, /* A builtin macro, TOKEN_FUNC or TOKEN_COMP. */
TOKEN_ARGV /* A series of parameters, TOKEN_COMP. */
};
/* The data for a token, a macro argument, and a macro definition. */
enum token_data_type
{
TOKEN_VOID, /* Token still being constructed, u is invalid. */
TOKEN_TEXT, /* Straight text, u.u_t is valid. */
TOKEN_FUNC, /* Builtin function definition, u.func is valid. */
TOKEN_COMP /* Composite argument, u.u_c is valid. */
};
/* A link in a chain of token data. */
enum token_chain_type
{
CHAIN_STR, /* Link contains a string, u.u_s is valid. */
CHAIN_FUNC, /* Builtin function definition, u.func is valid. */
CHAIN_ARGV, /* Link contains a $@ reference, u.u_a is valid. */
CHAIN_LOC /* Link contains location of m4wrap, u.u_l is valid. */
};
/* Composite tokens are built of a linked list of chains. Each link
of the chain is either a single text reference (ie. $1), or an argv
reference (ie. $@). */
struct token_chain
{
token_chain *next; /* Pointer to next link of chain. */
enum token_chain_type type; /* Type of this link. */
unsigned int quote_age; /* Quote_age of this link of chain, or 0. */
union
{
struct
{
const char *str; /* Pointer to text. */
size_t len; /* Remaining length of str. */
int level; /* Expansion level of link content, or -1. */
}
u_s;
builtin_func *func; /* Builtin token from defn. */
struct
{
macro_arguments *argv; /* Reference to earlier $@. */
unsigned int index; /* Argument index within argv. */
bool_bitfield flatten : 1; /* True to treat builtins as text. */
bool_bitfield comma : 1; /* True when `,' is next input. */
bool_bitfield skip_last : 1; /* True if last argument omitted. */
bool_bitfield has_func : 1; /* True if argv includes func. */
const string_pair *quotes; /* NULL for $*, quotes for $@. */
}
u_a;
struct
{
const char *file; /* File where subsequent links originate. */
int line; /* Line where subsequent links originate. */
}
u_l;
}
u;
};
/* The content of a token or macro argument. */
struct token_data
{
enum token_data_type type;
union
{
struct
{
/* We don't support NUL in text, yet. So len is just a
cache for now. But it will be essential if we ever DO
support NUL. */
size_t len;
char *text; /* The contents of the token. */
/* The value of quote_age when this token was scanned. If
this token is later encountered in the context of
scanning a quoted string, and quote_age has not changed,
then rescanning this string is provably unnecessary. If
zero, then this string potentially contains content that
might change the parse on rescan. Ignored for 0 len. */
unsigned int quote_age;
}
u_t;
builtin_func *func;
/* Composite text: a linked list of straight text and $@
placeholders. */
struct
{
token_chain *chain; /* First link of the chain. */
token_chain *end; /* Last link of the chain. */
bool_bitfield wrapper : 1; /* True if this is a $@ ref. */
bool_bitfield has_func : 1; /* True if chain includes func. */
}
u_c;
}
u;
};
#define TOKEN_DATA_TYPE(Td) ((Td)->type)
#define TOKEN_DATA_LEN(Td) ((Td)->u.u_t.len)
#define TOKEN_DATA_TEXT(Td) ((Td)->u.u_t.text)
#define TOKEN_DATA_QUOTE_AGE(Td) ((Td)->u.u_t.quote_age)
#define TOKEN_DATA_FUNC(Td) ((Td)->u.func)
typedef enum token_type token_type;
typedef enum token_data_type token_data_type;
extern void input_init (void);
extern token_type peek_token (void);
extern token_type next_token (token_data *, int *, struct obstack *, bool,
const call_info *);
extern void skip_line (const call_info *);
/* push back input */
extern void make_text_link (struct obstack *, token_chain **, token_chain **);
extern void push_file (FILE *, const char *, bool);
extern void append_macro (struct obstack *, builtin_func *, token_chain **,
token_chain **);
extern void push_macro (struct obstack *, builtin_func *);
extern struct obstack *push_string_init (const char *, int);
extern bool push_token (token_data *, int, bool);
extern void push_quote_wrapper (void);
extern void push_string_finish (void);
extern struct obstack *push_wrapup_init (const call_info *, token_chain ***);
extern void push_wrapup_finish (void);
extern bool pop_wrapup (void);
extern void input_print (struct obstack *);
/* current input file, and line */
extern const char *current_file;
extern int current_line;
/* left and right quote, begin and end comment */
extern string_pair curr_comm;
extern string_pair curr_quote;
#define DEF_LQUOTE "`"
#define DEF_RQUOTE "\'"
#define DEF_BCOMM "#"
#define DEF_ECOMM "\n"
extern void set_quotes (const char *, size_t, const char *, size_t);
extern void set_comment (const char *, size_t, const char *, size_t);
extern unsigned int quote_age (void);
extern bool safe_quotes (void);
extern const string_pair *quote_cache (struct obstack *, unsigned int,
const string_pair *);
/* File: output.c --- output functions. */
extern int current_diversion;
extern int output_current_line;
extern void output_init (void);
extern void output_exit (void);
extern void output_text (const char *, int);
extern void divert_text (struct obstack *, const char *, int, int);
extern bool shipout_string_trunc (struct obstack *, const char *, size_t,
size_t *);
extern void make_diversion (int);
extern void insert_diversion (int);
extern void insert_file (FILE *);
extern void freeze_diversions (FILE *);
/* File symtab.c --- symbol table definitions. */
/* Operation modes for lookup_symbol (). */
enum symbol_lookup
{
SYMBOL_LOOKUP,
SYMBOL_INSERT,
SYMBOL_DELETE,
SYMBOL_PUSHDEF,
SYMBOL_POPDEF
};
/* Symbol table entry. */
struct symbol
{
struct symbol *stack; /* Circular list for pushdef stack of symbol. */
bool_bitfield traced : 1;
bool_bitfield macro_args : 1;
bool_bitfield blind_no_args : 1;
bool_bitfield deleted : 1;
int pending_expansions;
size_t hash;
char *name;
size_t len;
token_data data; /* Type should be only TOKEN_TEXT or TOKEN_FUNC. */
};
#define SYMBOL_TRACED(S) ((S)->traced)
#define SYMBOL_MACRO_ARGS(S) ((S)->macro_args)
#define SYMBOL_BLIND_NO_ARGS(S) ((S)->blind_no_args)
#define SYMBOL_DELETED(S) ((S)->deleted)
#define SYMBOL_PENDING_EXPANSIONS(S) ((S)->pending_expansions)
#define SYMBOL_NAME(S) ((S)->name)
#define SYMBOL_NAME_LEN(S) ((S)->len)
#define SYMBOL_TYPE(S) (TOKEN_DATA_TYPE (&(S)->data))
#define SYMBOL_TEXT(S) (TOKEN_DATA_TEXT (&(S)->data))
#define SYMBOL_TEXT_LEN(S) (TOKEN_DATA_LEN (&(S)->data))
#define SYMBOL_FUNC(S) (TOKEN_DATA_FUNC (&(S)->data))
typedef enum symbol_lookup symbol_lookup;
typedef struct symbol symbol;
typedef void hack_symbol (symbol *, void *);
#define HASHMAX 65537 /* default, overridden by -Hsize */
extern void free_symbol (symbol *);
extern void symtab_init (size_t);
extern void symtab_free (void);
extern symbol *lookup_symbol (const char *, size_t, symbol_lookup);
extern void hack_all_symbols (hack_symbol *, void *);
/* File: macro.c --- macro expansion. */
extern int expansion_level;
extern void expand_input (void);
extern void call_macro (symbol *, macro_arguments *, struct obstack *);
extern size_t adjust_refcount (int, bool);
extern bool arg_adjust_refcount (macro_arguments *, bool);
extern unsigned int arg_argc (macro_arguments *);
extern const call_info *arg_info (macro_arguments *);
extern token_data_type arg_type (macro_arguments *, unsigned int);
extern const char *arg_text (macro_arguments *, unsigned int, bool);
extern bool arg_equal (macro_arguments *, unsigned int, unsigned int);
extern bool arg_empty (macro_arguments *, unsigned int);
extern size_t arg_len (macro_arguments *, unsigned int, bool);
extern builtin_func *arg_func (macro_arguments *, unsigned int);
extern struct obstack *arg_scratch (void);
extern bool arg_print (struct obstack *, macro_arguments *, unsigned int,
const string_pair *, bool, token_chain **, const char *,
size_t *, bool);
extern macro_arguments *make_argv_ref (macro_arguments *, const char *, size_t,
bool, bool);
extern void push_arg (struct obstack *, macro_arguments *, unsigned int);
extern void push_arg_quote (struct obstack *, macro_arguments *, unsigned int,
const string_pair *);
extern void push_args (struct obstack *, macro_arguments *, bool, bool);
extern void wrap_args (macro_arguments *);
/* Grab the text at argv index I. Assumes macro_argument *argv is in
scope, and aborts if the argument is not text. */
#define ARG(i) arg_text (argv, i, false)
/* Grab the text length at argv index I. Assumes macro_argument *argv
is in scope, and aborts if the argument is not text. */
#define ARG_LEN(i) arg_len (argv, i, false)
/* File: builtin.c --- builtins. */
struct builtin
{
const char *name;
bool_bitfield gnu_extension : 1;
bool_bitfield groks_macro_args : 1;
bool_bitfield blind_if_no_args : 1;
builtin_func *func;
};
struct predefined
{
const char *unix_name;
const char *gnu_name;
const char *func;
};
typedef struct builtin builtin;
typedef struct predefined predefined;
struct re_pattern_buffer;
struct re_registers;
/* The default sequence detects multi-digit parameters (obsolete after
1.4.x), and any use of extended arguments with the default ${}
syntax (new in 2.0). */
#define DEFAULT_MACRO_SEQUENCE "\\$\\({[^}]*}\\|[0-9][0-9]+\\)"
extern void builtin_init (void);
extern bool bad_argc (const call_info *, int, unsigned int, unsigned int);
extern void define_builtin (const char *, size_t, const builtin *,
symbol_lookup);
extern void set_macro_sequence (const char *);
extern void free_regex (void);
extern void define_user_macro (const char *, size_t, const char *, size_t,
symbol_lookup);
extern void undivert_all (void);
extern void expand_user_macro (struct obstack *, symbol *, int,
macro_arguments *);
extern void m4_placeholder (struct obstack *, int, macro_arguments *)
ATTRIBUTE_COLD;
extern void init_pattern_buffer (struct re_pattern_buffer *,
struct re_registers *);
extern const builtin *find_builtin_by_addr (builtin_func *);
extern const builtin *find_builtin_by_name (const char *);
extern void func_print (struct obstack *, const builtin *, bool, token_chain **,
const string_pair *);
/* File: path.c --- path search for include files. */
extern void include_init (void);
extern void include_env_init (void);
extern void add_include_directory (const char *);
extern FILE *m4_path_search (const char *, bool, char **);
extern void record_dependency (const char *, int);
extern void generate_make_dependencies (const char *, const char *, int);
/* File: eval.c --- expression evaluation. */
extern bool evaluate (const call_info *, const char *, size_t, int32_t *);
/* File: format.c --- printf like formatting. */
extern void expand_format (struct obstack *, int, macro_arguments *);
/* File: freeze.c --- frozen state files. */
extern void produce_frozen_state (const char *);
extern void reload_frozen_state (const char *);
/* Debugging the memory allocator. */
#ifdef WITH_DMALLOC
# define DMALLOC_FUNC_CHECK
# include <dmalloc.h>
#endif
/* Other debug stuff. */
#ifdef DEBUG
# define DEBUG_INCL 1
# define DEBUG_INPUT 1
# define DEBUG_MACRO 1
# define DEBUG_OUTPUT 1
# define DEBUG_REGEX 1
# define DEBUG_STKOVF 1
# define DEBUG_SYM 1
#endif
/* Generic utilities. */
/* Convert a possibly-signed character to an unsigned character. This is
a bit safer than casting to unsigned char, since it catches some type
errors that the cast doesn't. */
#if HAVE_INLINE
static inline unsigned char to_uchar (char ch) { return ch; }
#else
# define to_uchar(C) ((unsigned char) (C))
#endif
/* Avoid negative logic when comparing two strings. */
#define STREQ(a, b) (strcmp (a, b) == 0)
/* Wrap fwrite. No need to worry about the return value, since we
faithfully check ferror later on. */
#if HAVE_INLINE
static inline void
xfwrite (const void *buf, size_t size, size_t n, FILE *file)
{
ignore_value (fwrite (buf, size, n, file));
}
#else
# define xfwrite(B, S, N, F) (ignore_value (fwrite (B, S, N, F)))
#endif