blob: 52d1affee810d6fd4370d77390dfae630962a914 [file] [log] [blame]
/*
* Copyright (c) 2021-2025 Symas Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of the Symas Corporation nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "config.h"
#include <ext/stdio_filebuf.h>
#include "cobol-system.h"
#include "cbldiag.h"
#include "util.h"
#include "copybook.h"
#include "lexio.h"
extern int yy_flex_debug;
source_format_t& cdf_source_format();
void
source_format_t::infer( const char *bol, bool want_reference_format ) {
if( bol ) {
left = 7;
if( want_reference_format ) {
right = 73;
}
}
dbgmsg("%s:%d: %s format detected", __func__, __LINE__,
description());
}
// public source format test functions
bool is_fixed_format() { return cdf_source_format().is_fixed(); }
bool is_reference_format() { return cdf_source_format().is_reffmt(); }
static bool debug_mode = false;
/*
* The "debug mode" is a little odd, because we have to make sure a
* leading "D" doesn't start the verb DISPLAY (for example). If
* debug_mode is on, debug lines are included in the parse. If
* debug_mode is off but we're not in fixed_format, lines starting
* with "D" are also included.
*
* So, the line is excluded if: fixed format and not debug mode
* Else, it's included.
*/
static inline int left_margin() {
return cdf_source_format().left_margin();
}
static inline int right_margin() {
return cdf_source_format().right_margin();
}
/*
* When setting the indicator column explicity:
* To get strict fixed 72-column lines, use a negative column number.
* When setting back to 0 (free), the right margin is also reset to 0.
*/
void
cobol_set_indicator_column( int column );
bool include_debug() { return is_fixed_format() && debug_mode; }
bool set_debug( bool tf ) { return debug_mode = tf && is_fixed_format(); }
static bool nonblank( const char ch ) { return !isblank(ch); }
static inline char *
start_of_line( char *bol, char *eol ) {
bol = std::find_if(bol, eol, nonblank);
gcc_assert(bol < eol); // must exist
return bol;
}
static inline char *
continues_at( char *bol, char *eol ) {
if( cdf_source_format().is_free() ) return NULL; // cannot continue in free format
bol += left_margin();
if( *bol != '-' ) return NULL; // not a continuation line
return start_of_line(++bol, eol);
}
// Return pointer to indicator column. Test ch if provided.
// NULL means no indicator column or tested value not present.
static inline char *
indicated( char *bol, const char *eol, char ch = '\0' ) {
if( cdf_source_format().left_margin() == 0 && *bol != '*' ) {
return NULL; // no indicator column in free format, except for comments
}
gcc_assert(bol != NULL);
auto ind = bol + left_margin();
if( eol <= ind ) return NULL; // left margin would be after end of line
// If TAB is in the line-number region, nothing is in the indicator column.
bool has_tab = std::any_of(bol, ind,
[](const char ch) { return ch == '\t'; } );
if( has_tab ) return NULL;
if( (bol += left_margin()) > eol ) return NULL;
return ch == '\0' || ch == *bol? bol : NULL;
}
static char *
remove_inline_comment( char *bol, char *eol ) {
char *nl = std::find(bol, eol, '\n');
if( bol < nl ) {
static char ends = '\0';
std::swap(*nl, ends);
char *comment = strstr(bol, "*>");
if( comment ) {
std::fill(comment, nl, SPACE);
}
std::swap(*nl, ends);
}
return eol;
}
static void
erase_line( char *src, char *esrc ) {
dbgmsg( "%s: erasing %.*s from input", __func__, int(esrc-src), src);
erase_source(src, esrc);
}
static size_t
count_newlines( const char *beg, const char *end ) {
return std::count(beg, end, '\n');
}
size_t
filespan_t::tab_check( const char *src, const char *esrc ) {
static const char tab = '\t';
const char *data = src + left_margin();
if( data < esrc ) { // not a blank line
const char *tab_at = std::find(src, data, tab);
if( tab_at < data ) {
return (tab_at - src) + 1;
}
}
return 0;
}
static const auto extended_icase = regex::extended | regex::icase;
std::stack< std::list<replace_t> > replace_directives;
static bool
is_word_or_quote( char ch ) {
return ch == '"' || ch == '\'' || ISALNUM(ch);
}
/*
* If the replacement is not leading/trailing, the edges of the
* matched pattern must delimit a Cobol word. If not, add a space to
* the replacement.
*/
static void
maybe_add_space(const span_t& pattern, replace_t& recognized) {
static const char blank[] = " ";
const char *befter[2] = { "", "" };
gcc_assert(0 < recognized.before.size());
// start of pattern and end of preceding text
if( pattern.p[0] == '.' && is_word_or_quote(recognized.before.p[-1]) ) {
befter[0] = blank;
}
// end of pattern, and start of succeeding text
if( pattern.pend[-1] == '.' && is_word_or_quote(recognized.before.pend[0]) ) {
befter[1] = blank;
}
if( befter[0] == blank || befter[1] == blank ) {
const char *s = xasprintf( "%s%.*s%s",
befter[0],
recognized.after.size(), recognized.after.p,
befter[1] );
recognized.after = span_t(s, s + strlen(s));
}
}
/*
* Keep track of the next pending replacement for each active REPLACE
* directive. For the current line, apply patterns that begins on the
* line. (It may match input extending beyond the current eol.)
*
* As each replacement is identified, append it to the passsed list of
* pending replacements. For these elements:
*
* before is a span in mfile
* after is dynamically allocated
*/
static void
recognize_replacements( filespan_t mfile, std::list<replace_t>& pending_replacements ) {
static const char *top_of_stack_cache, *applies_to;
struct future_replacement_t {
replace_t directive;
span_t found;
future_replacement_t( const replace_t& replace, span_t found )
: directive(replace), found(found)
{}
bool operator<( const future_replacement_t& that ) const {
return found.p < that.found.p;
}
};
static std::list<future_replacement_t> futures;
if( replace_directives.empty() ) return;
if( ! (top_of_stack_cache == replace_directives.top().front().before.p
&&
applies_to == mfile.data) )
{
futures.clear();
top_of_stack_cache = replace_directives.top().front().before.p;
applies_to = mfile.data;
}
if( futures.empty() ) {
/*
* From the current point in the file, find the next match for each
* pattern at the top of the replacement stack.
*/
for( const auto& directive : replace_directives.top() ) {
regex re(directive.before.p, extended_icase);
cmatch cm;
span_t found(mfile.eodata, mfile.eodata);
if( regex_search( mfile.ccur(),
const_cast<const char *>(mfile.eodata),
cm, re) ) {
gcc_assert(cm[1].matched);
found = span_t( cm[1].first, cm[1].second );
if( yy_flex_debug ) {
size_t n = count_newlines(mfile.data, found.p);
dbgmsg("%s:%d first '%.*s' is on line " HOST_SIZE_T_PRINT_UNSIGNED
" (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
directive.before.size(), directive.before.p,
(fmt_size_t)++n, (fmt_size_t)(found.p - mfile.data));
}
} else {
dbgmsg("%s:%d not found: '%s' in \n'%.*s'", __func__, __LINE__,
directive.before.p, int(strlen(directive.before.p)), mfile.cur);
}
futures.push_back( future_replacement_t(directive, found) );
}
}
gcc_assert(!futures.empty());
gcc_assert(futures.size() == replace_directives.top().size());
replace_t recognized;
auto pnext = std::min_element(futures.begin(), futures.end());
for( const char *bol = mfile.cur; // more than one replacement may apply to a line
bol <= pnext->found.p && pnext->found.p < mfile.eol; ) {
auto& next(*pnext);
recognized = replace_t( next.found, span_t(strlen(next.directive.after.p),
next.directive.after.p) );
maybe_add_space(next.directive.before, recognized);
pending_replacements.push_back(recognized);
bol = next.found.pend;
if( yy_flex_debug ) {
size_t n = std::count(const_cast<const char *>(mfile.data),
recognized.before.p, '\n');
dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
" @ " HOST_SIZE_T_PRINT_UNSIGNED ": '%s'\n/%.*s/%.*s/",
__func__, __LINE__,
(fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data),
next.directive.before.p,
int(recognized.before.size()), recognized.before.p,
int(recognized.after.size()), recognized.after.p );
}
// Update the futures element for this pattern
cmatch cm;
next.found = span_t(mfile.eodata, mfile.eodata);
regex re(next.directive.before.p, extended_icase);
if( regex_search(bol, const_cast<const char *>(mfile.eodata), cm, re) ) {
gcc_assert(cm[1].matched);
next.found = span_t( cm[1].first, cm[1].second );
size_t n = std::count(const_cast<const char *>(mfile.data),
next.found.p, '\n');
if( false )
dbgmsg("%s:%d next '%.*s' will be on line " HOST_SIZE_T_PRINT_UNSIGNED
" (offset " HOST_SIZE_T_PRINT_UNSIGNED ")", __func__, __LINE__,
next.directive.before.size(), next.directive.before.p,
(fmt_size_t)++n, (fmt_size_t)(next.found.p - mfile.data));
}
pnext = std::min_element(futures.begin(), futures.end());
}
}
static void
check_push_pop_directive( filespan_t& mfile ) {
char eol = '\0';
const char *p = std::find(mfile.cur, mfile.eol, '>');
if( ! (p < mfile.eol && p[1] == *p ) ) return;
const char pattern[] =
">>[[:blank:]]*(push|pop)[[:blank:]]+"
"("
"all|"
"call-convention|"
"cobol-words|"
"define|"
"source[[:blank:]]+format|"
"turn"
")";
static regex re(pattern, extended_icase);
// show contents of marked subexpressions within each match
cmatch cm;
std::swap(*mfile.eol, eol); // see implementation for excuses
bool ok = regex_search(p, const_cast<const char *>(mfile.eol), cm, re);
std::swap(*mfile.eol, eol);
if( ok ) {
gcc_assert(cm.size() > 1);
bool push = TOUPPER(cm[1].first[1]) == 'U';
switch( TOUPPER(cm[2].first[0]) ) {
case 'A': // ALL
push? cdf_push() : cdf_pop();
break;
case 'C':
switch( TOUPPER(cm[2].first[1]) ) {
case 'A': // CALL-CONVENTION
push? cdf_push_call_convention() : cdf_pop_call_convention();
break;
case 'O': // COBOL-WORDS
push? cdf_push_current_tokens() : cdf_pop_current_tokens();
break;
default:
gcc_unreachable();
}
break;
case 'D': // DEFINE
push? cdf_push_dictionary() : cdf_pop_dictionary();
break;
case 'S': // SOURCE FORMAT
push? cdf_push_source_format() : cdf_pop_source_format();
break;
case 'T': // TURN
push? cdf_push_enabled_exceptions() : cdf_pop_enabled_exceptions();
break;
default:
gcc_unreachable();
}
erase_line(const_cast<char*>(cm[0].first),
const_cast<char*>(cm[0].second));
}
}
static void
check_source_format_directive( filespan_t& mfile ) {
char eol = '\0';
const char *p = std::find(mfile.cur, mfile.eol, '>');
if( ! (p < mfile.eol && p[1] == *p ) ) return;
const char pattern[] =
">>[[:blank:]]*source[[:blank:]]+"
"(format[[:blank:]]+)?"
"(is[[:blank:]]+)?"
"(fixed|free)";
static regex re(pattern, extended_icase);
// show contents of marked subexpressions within each match
cmatch cm;
std::swap(*mfile.eol, eol); // see implementation for excuses
bool ok = regex_search(p, const_cast<const char *>(mfile.eol), cm, re);
std::swap(*mfile.eol, eol);
if( ok ) {
gcc_assert(cm.size() > 1);
switch( cm[3].length() ) {
case 4:
cobol_set_indicator_column(0);
break;
case 5:
cobol_set_indicator_column(-7);
break;
default:
gcc_assert(cm[3].length() == 4 || cm[3].length() == 5);
break;
}
dbgmsg( "%s:%d: %s format set, on line " HOST_SIZE_T_PRINT_UNSIGNED,
__func__, __LINE__,
cdf_source_format().description(),
(fmt_size_t)mfile.lineno() );
char *bol = cdf_source_format().is_fixed()? mfile.cur : const_cast<char*>(cm[0].first);
gcc_assert(cm[0].second <= mfile.eol);
erase_line(bol, const_cast<char*>(cm[0].second));
}
}
struct buffer_t : public bytespan_t {
char *pos; // current output position
buffer_t( char *data, char *eodata )
: bytespan_t(data, eodata)
, pos(data)
{
if(pos) *pos = '\0';
}
size_t nline() const {
gcc_assert(data <= pos);
return std::count(data, pos, '\n');
}
size_t free_space() const { gcc_assert(pos <= eodata); return eodata - pos; }
bool pad_lines( size_t goal ) {
while( nline() < goal ) {
if( pos == eodata ) return false;
*pos++ = '\n';
}
return true;
}
void show() const {
gcc_assert(data <= pos);
dbgmsg("flex input buffer: '%.*s'\n[xelf]", int(pos - data), data);
}
void dump() const {
#ifdef GETENV_OK
if( getenv("lexer_input") ) show();
#endif
}
};
static inline bool is_p( char ch ) { return TOUPPER(ch) == 'P'; }
static bool
is_program_id( const char *p, const char *eol ) {
static const std::string program_id("PROGRAM-ID");
auto eop = p + program_id.size();
if( eop < eol ) {
// PROGRAM-ID must be followed by a dot, perhaps with intervening whitespace.
for( const char *dot=eop; dot < eol && *dot != '.'; dot++ ) {
if( !ISSPACE(*dot) ) return false;
}
std::string line (p, eop);
std::transform(line.begin(), line.end(), line.begin(), ::toupper);
return line == program_id;
}
return false;
}
const char * esc( size_t len, const char input[] );
static bool
is_word_char( char ch ) {
switch(ch) {
case '$':
case '-':
case '_':
return true;
}
return ISALNUM(ch);
}
static bool
is_numeric_char( char ch ) {
return ISDIGIT(ch)
|| TOUPPER(ch) == 'E'
|| ch == '.'
|| ch == ','
;
}
static bool
is_numeric_term( span_t term ) {
gcc_assert(term.p);
if( term.p[0] == '+' || term.p[0] == '-' ) term.p++;
auto p = std::find_if( term.p, term.pend,
[]( char ch ) {
return ! is_numeric_char(ch);
} );
return p == term.pend;
}
struct replacing_term_t {
bool matched, done;
span_t leading_trailing, term, stmt;
explicit replacing_term_t(const char input[])
: matched(false), done(false), stmt(span_t(input, input))
{}
};
extern YYLTYPE yylloc;
static const char *
last_newline (const char *p, const char *pend ) {
size_t len = pend - p;
return static_cast<const char *>( memrchr( p, '\n', len ) );
}
/*
* For some statement parsed with regex_search, set yyloc to indicate the line
* and column spans of the term. Assume stmt begins at the start of a line.
*/
static void
update_yylloc( const csub_match& stmt, const csub_match& term ) {
gcc_assert(stmt.first <= term.first && term.second <= stmt.second);
class dump_loc_on_exit {
public:
dump_loc_on_exit() {
if( gcobol_getenv( "update_yylloc" ) )
location_dump( "update_yylloc", __LINE__, "begin", yylloc);
}
~dump_loc_on_exit() {
if( gcobol_getenv( "update_yylloc" ) )
location_dump( "update_yylloc", __LINE__, "end ", yylloc);
}
} dloe;
size_t nline = std::count( stmt.first, term.second, '\n' );
size_t n = std::count( term.first, term.second, '\n' );
if( nline ) {
yylloc.last_line += nline;
yylloc.first_line = yylloc.last_line - n;
}
/*
* Set the column span for the term.
*/
const char *p = last_newline(stmt.first, stmt.second);
if( !p ) { // no newlines in entire statement
yylloc.first_column = (term.first - stmt.first) + 1;
yylloc.last_column = (term.second - stmt.first) + 1;
return;
}
p = last_newline(stmt.first, term.first);
if( !p ) { // no newlines before term
yylloc.first_column = (term.first - stmt.first) + 1;
p = last_newline(term.first, term.second);
gcc_assert(p); // newline must be in term
yylloc.last_column = (term.second - p) + 1;
return;
}
const char *bol = p; // bol points to last newline before term
yylloc.first_column = term.first - bol;
p = last_newline(term.first, term.second);
if( p ) { // term has newlines, too
yylloc.last_column = (p - term.first);
} else {
yylloc.last_column = yylloc.first_column + term.length();
}
}
static replacing_term_t
parse_replacing_term( const char *stmt, const char *estmt ) {
gcc_assert(stmt); gcc_assert(estmt); gcc_assert(stmt <= estmt);
replacing_term_t output(stmt);
static const char pattern[] =
"^([[:space:];,]+(LEADING|TRAILING|BY))?" // 1, 2
"[[:space:];,]+" // leading space between pairs
"(" // 3
"(\"" "([\"]{2}|[^\"])*" "\")" // 4, 5
"|" "('" "([']{2}|[^'])*" "')" // 6, 7
"|" "(" "[+-]?[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")" // 8, 9
"|" "(==(" "(=?[^=]+)*" ")==)" // 10, 11, 12
")"
"(([[:space:];,]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:];,]*([.]))?" // 13, 14, 15
;
static regex re(pattern, extended_icase);
cmatch cm;
if( ! regex_search( stmt, estmt, cm, re) ) return output;
bool replacing_term = cm[2].matched && TOUPPER(cm[2].first[0]) == 'B';
if( cm[2].matched && ! replacing_term ) {
output.leading_trailing = cm[2];
}
// Apply such that quoted matches supersede word matches.
if( cm[11].matched ) output.term = cm[11];
if( cm[ 8].matched ) output.term = cm[ 8];
if( cm[ 6].matched ) output.term = cm[ 6];
if( cm[ 4].matched ) output.term = cm[ 4];
// The matched segment extends to the end of the matched term, or to
// the dot at end of statement. Include the pseudotext ==, if found.
output.stmt = span_t(cm[0].first, output.term.pend);
if( cm[10].matched ) output.stmt.pend = cm[10].second;
if( cm[15].matched && ISSPACE(cm[15].second[0]) ) { // matched end of statement
output.done = output.matched = true;
output.stmt = cm[0];
gcc_assert(output.stmt.pend[-1] == '.');
dbgmsg("%s:%d: done at '%.*s'", __func__, __LINE__,
output.term.size(), output.term.p);
return output;
}
if( is_numeric_term(output.term) ) {
output.matched = output.stmt.p < output.term.p;
gcc_assert(output.matched);
// look for fractional part
if( is_numeric_char(*output.term.pend) && ISDIGIT(output.term.pend[1]) ) {
gcc_assert(!ISDIGIT(*output.term.pend));
auto p = std::find_if(++output.term.pend, estmt,
[]( char ch ) { return !ISDIGIT(ch); } );
output.stmt.pend = output.term.pend = p;
output.done = '.' == output.stmt.pend[0] && ISSPACE(output.stmt.pend[1]);
if( output.done ) output.stmt.pend++;
}
dbgmsg("%s:%d: %s '%.*s'", __func__, __LINE__,
output.done? "done at" : "term is",
output.term.size(), output.term.p);
return output;
}
if( yy_flex_debug ) { // should be looking only for words
dbgmsg("%s:%d: not done, working with '%.*s'", __func__, __LINE__,
cm[0].length(), cm[0].first);
int i=0;
for( auto m : cm ) {
if( m.matched )
dbgmsg("%4d) '%.*s'", i, m.length(), m.first);
i++;
}
}
if( !cm[8].matched ) {
output.matched = output.stmt.p < output.term.p;
gcc_assert(output.matched);
dbgmsg("%s:%d: term is '%.*s'", __func__, __LINE__,
output.term.size(), output.term.p);
return output;
}
bool extraneous_replacing = 'R' == TOUPPER(cm[8].first[0]); // maybe
if( extraneous_replacing ) { // prove it
static const char replacing[] = "REPLACING";
for( size_t i=0; i < strlen(replacing); i++ ) {
if( replacing[i] != TOUPPER(cm[8].first[i]) ) {
extraneous_replacing = false;
break;
}
}
if( extraneous_replacing ) {
update_yylloc( cm[0], cm[8] );
yywarn("syntax error: invalid '%.*s'", cm[8].length(), cm[8].first);
output.matched = false;
return output;
}
}
gcc_assert(cm[8].matched);
gcc_assert(0 < output.term.size());
dbgmsg("%s:%d: more words starting at '%.80s'", __func__, __LINE__,
output.term.pend);
static const char term_pattern[] =
"^[[:space:]]+"
"(" "(IN|OF)[[:space:]]+" ")" // 1, 2
"(" "[+-]?[[:alnum:]]+([$_-]+[[:alnum:]]+)*" ")" // 3, 4
"(" "[[:space:]]*[(]" ")?" // 5
"(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 6, 7, 8
;
static const char paren_pattern[] =
"^[[:space:]]*"
"(" "[()][^()]*[()]" ")" // 1
"(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 2, 3, 4
;
regex term_re(term_pattern, extended_icase);
regex paren_re(paren_pattern, extended_icase);
ssize_t nsub = 0;
while( regex_search( output.term.pend, estmt, cm, term_re) ) {
output.stmt.pend = output.term.pend = cm[3].second; // found a word
if( cm[5].matched ) break; // found left parenthesis
const csub_match& done(cm[8]);
if( done.matched ) {
output.done = output.matched = output.stmt.p < output.term.p;
gcc_assert(output.done);
goto matched;
}
}
// match subscripts, if any
while( regex_search( output.term.pend, estmt, cm, paren_re) ) {
output.stmt.pend = output.term.pend = cm[1].second;
if( cm[1].first[0] == '(' ) nsub++;
if( cm[1].first[0] == ')' ) nsub--;
if( cm[1].second[-1] == '(' ) nsub++;
if( cm[1].second[-1] == ')' ) nsub--;
const csub_match& done(cm[4]);
if( done.matched ) {
output.matched = output.stmt.p < output.term.p;
output.stmt.pend = done.second;
output.done = output.stmt.pend[-1] == '.';
goto matched;
}
if( nsub == 0 ) break;
}
matched:
output.matched = output.stmt.p < output.term.p;
if( yy_flex_debug ) {
const char *status = "unmatched";
if( output.matched ) status = output.done? "done" : "matched";
dbgmsg("%s:%d: %s term is '%.*s'", __func__, __LINE__, status,
output.term.size(), output.term.p? output.term.p : "");
}
return output;
}
struct replacing_pair_t {
span_t leading_trailing, stmt;
replace_t replace;
bool matched() const { return 0 < stmt.size(); }
bool done() const { return matched() && stmt.pend[-1] == '.'; }
};
static replacing_pair_t
parse_replacing_pair( const char *stmt, const char *estmt ) {
replacing_pair_t pair;
pair.replace = replace_t();
auto parsed = parse_replacing_term( stmt, estmt ); // before
if( parsed.matched ) {
if( parsed.term.size() == 0 ) return pair; // failure: empty before string
pair.leading_trailing = parsed.leading_trailing;
pair.stmt = parsed.stmt;
pair.replace.before = parsed.term;
if( !parsed.done ) {
parsed = parse_replacing_term( pair.stmt.pend, estmt ); // after
if( parsed.matched ) {
pair.stmt.pend = parsed.stmt.pend;
pair.replace.after = parsed.term;
} else {
dbgmsg("%s:%d: not matched '%.*s'", __func__, __LINE__,
pair.stmt.size(), pair.stmt.p);
}
}
if( yy_flex_debug ) {
const char *status = "unmatched";
if( pair.matched() ) status = pair.done()? "done" : "matched";
dbgmsg("%s:%d: [%s] replacing '%.*s' with '%.*s'", __func__, __LINE__,
status,
pair.replace.before.size(), pair.replace.before.p,
pair.replace.after.size(), pair.replace.after.p);
}
} else {
for( auto p = stmt; (p = std::find(p, estmt, '.')) < estmt; p++ ) {
if( ISSPACE(p[1]) ) {
pair.stmt = span_t(stmt, ++p);
break;
}
}
if( pair.stmt.p ) {
yywarn("CDF syntax error '%.*s'", (int)pair.stmt.size(), pair.stmt.p);
}
else {
// This eliminated a compiler warning about "format-overflow"
yywarn("CDF syntax error");
}
pair.stmt = span_t(size_t(0), stmt);
pair.replace = replace_t();
}
return pair;
}
static std::pair<std::list<replace_t>, char *>
parse_replace_pairs( const char *stmt, const char *estmt, bool is_copy_stmt ) {
std::list<replace_t> pairs ;
static const char any_ch[] = ".";
static const char word_ch[] = "[[:alnum:]$_-]";
static const char nonword_ch[] = "[^[:alnum:]\"'$_-]";
// Pattern to find one REPLACE pseudo-text pair
static const char replace_pattern[] =
"([[:space:]]+(LEADING|TRAILING))?" // 1, 2
"[[:space:]]+"
"==(" "(=?[^=]+)+" ")==" // 3, 4
"[[:space:]]+BY[[:space:]]+"
"==(" "(=?[^=]+)*" ")==" // 5, 6
"(([[:space:]]+[\"'=[:alnum:]+-]{1,2})" "|" "[[:space:]]*([.]))?" // 7, 8, 9
;
regex pair_re(replace_pattern, extended_icase);
cmatch cm;
replacing_pair_t parsed;
bool end_of_stmt = false;
for( auto p = stmt; p < estmt && !end_of_stmt; p = parsed.stmt.pend ) {
if( is_copy_stmt ) {
parsed = parse_replacing_pair(p, estmt);
if( parsed.replace.before.size() == 0 ) break; // empty before
if( parsed.replace.after.p == NULL ) break; // invalid after
end_of_stmt = parsed.done();
} else {
if( ! regex_search( p, estmt, cm, pair_re) ) break;
// Report findings.
if( false && yy_flex_debug ) {
for( size_t i=0; i < cm.size(); i++ ) {
dbgmsg("%s: %s " HOST_SIZE_T_PRINT_UNSIGNED ": '%.*s'", __func__,
cm[i].matched? "Pair" : "pair",
(fmt_size_t)i,
cm[i].matched? int(cm[i].length()) : 0,
cm[i].matched? cm[i].first : "");
}
}
gcc_assert(cm[3].matched);
gcc_assert(cm[5].matched);
parsed.leading_trailing = cm[2];
parsed.replace.before = cm[3];
parsed.replace.after = cm[5];
parsed.stmt = cm[0];
// If not done, exclude trailing portion from statement match.
if( !parsed.done() && cm[8].matched ) {
gcc_assert(!cm[9].matched);
parsed.stmt.pend = cm[8].first;
}
}
span_t& before(parsed.replace.before);
const span_t& after(parsed.replace.after);
const char *befter[2] = { nonword_ch, nonword_ch };
gcc_assert(before.p < before.pend);
if( !is_word_char(before.p[0]) ) befter[0] = any_ch;
if( !is_word_char(before.pend[-1]) ) befter[1] = any_ch;
const char *src = esc(before.size(), before.p);
if( parsed.leading_trailing.size() > 0 ) {
switch( TOUPPER(parsed.leading_trailing.p[0]) ) {
case 'L': // leading
befter[1] = word_ch;
break;
case 'T': // trailing
befter[0] = word_ch;
break;
default:
gcc_unreachable();
}
dbgmsg("%s:%d: dealing with %.*s", __func__, __LINE__,
int(parsed.leading_trailing.size()), parsed.leading_trailing.p);
}
src = xasprintf("%s(%s)%s", befter[0], src, befter[1]);
struct { span_t before, after; } output;
output.before = span_t(strlen(src), src);
output.after = after.dup();
gcc_assert(!before.has_nul());
pairs.push_back( replace_t( output.before, output.after ) );
// COPY REPLACING matches end-of-statment here
// REPLACE matched end-of-statement in caller, and estmt[-1] == '.'
if( is_copy_stmt && parsed.stmt.pend[-1] == '.' ) break;
}
if( yy_flex_debug ) {
dbgmsg( "%s:%d: %s: " HOST_SIZE_T_PRINT_UNSIGNED " pairs parsed from '%.*s'",
__func__, __LINE__,
parsed.done() ? "done" : "not done",
(fmt_size_t)pairs.size(), parsed.stmt.size(), parsed.stmt.p );
int i = 0;
for( const auto& replace : pairs ) {
dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__,
++i, replace.before.p, replace.after.p);
}
}
if( !parsed.done() ) {
pairs.clear();
return std::make_pair(pairs, const_cast<char*>(stmt));
}
return std::make_pair(pairs, const_cast<char*>(parsed.stmt.pend));
}
struct copy_descr_t {
bool parsed;
int fd;
size_t nreplace;
span_t partial_line, erased_lines;
copy_descr_t( const char *line, const char *eol)
: parsed(false), fd(-1), nreplace(0), partial_line(line, eol) {}
};
static YYLTYPE
location_in( const filespan_t& mfile, const csub_match& cm ) {
YYLTYPE loc {
int(mfile.lineno() + 1), int(mfile.colno() + 1),
int(mfile.lineno() + 1), int(mfile.colno() + 1)
};
gcc_assert(mfile.cur <= cm.first && cm.second <= mfile.eodata);
auto nline = std::count(cm.first, cm.second, '\n');
if( nline ) {
gcc_assert(nline < loc.first_line);
loc.first_line -= nline;
auto p = static_cast<const char*>(memrchr(cm.first, '\n', cm.length()));
loc.last_column = (cm.second) - p;
}
location_dump(__func__, __LINE__, "copy?", loc);
return loc;
}
static copy_descr_t
parse_copy_directive( filespan_t& mfile ) {
static const char *most_recent_buffer;
static span_t copy_stmt(mfile.eodata, mfile.eodata);
static const char pattern[] =
"COPY" "[[:space:]]+"
/* 1 */ "("
/*2,3*/ "\"(" "([\"]{2}|[^\"])+" ")\""
/*4,5*/ "|" "'(" "([']{2}|[^'])+" ")[']"
/*6,7*/ "|" "(" "[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")"
/* */ ")"
/* 8 */ "("
/* 9 */ "[[:space:]]+(OF|IN)[[:space:]]+"
/* 10*/ "("
/*11,12*/ "(\"" "([\"]{2}|[^\"])+" "\")"
/*13,14*/ "|" "('" "([']{2}|[^'])+" "')"
/*15,16*/ "|" "(" "[[:alnum:]]+([_-]+[[:alnum:]]+)*" ")"
/* */ ")"
/* */ ")?"
/*17,18*/ "([[:space:]]+SUPPRESS([[:space:]]+PRINTING)?)?"
/*19,20 */ "(" "([[:space:]]*[.])" "|" "[[:space:]]+REPLACING" ")"
;
static regex re(pattern, extended_icase);
cmatch cm;
copy_descr_t outcome(mfile.cur, mfile.cur);
// COPY appears in current buffer?
if( most_recent_buffer != mfile.data || copy_stmt.p < mfile.cur ) {
most_recent_buffer = mfile.data;
copy_stmt.p = mfile.eodata;
if( regex_search(mfile.ccur(),
const_cast<const char *>(mfile.eodata), cm, re) ) {
copy_stmt = span_t( cm[0].first, cm[0].second );
if( yy_flex_debug ) {
size_t nnl = 1 + count_newlines(mfile.data, copy_stmt.p);
size_t nst = 1 + count_newlines(copy_stmt.p, copy_stmt.pend);
dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
": COPY directive is " HOST_SIZE_T_PRINT_UNSIGNED " lines '%.*s'",
__func__, __LINE__,
(fmt_size_t)nnl, (fmt_size_t)nst, copy_stmt.size(), copy_stmt.p);
}
}
}
// If COPY appears on the current line, parse it completely this time.
if( mfile.cur <= copy_stmt.p &&
copy_stmt.p < mfile.eol ) {
outcome.parsed = regex_search(copy_stmt.p, copy_stmt.pend, cm, re);
gcc_assert(outcome.parsed);
outcome.partial_line = span_t(mfile.cur, copy_stmt.p);
if( yy_flex_debug ) {
dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions",
(fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '('));
int i = 0;
for( const auto& m : cm ) {
if( m.matched )
dbgmsg("%s:%d: %2d: '%.*s'", __func__, __LINE__,
i, int(m.length()), m.first);
i++;
}
}
auto& copybook_name = cm[1];
auto& library_name = cm[10];
bool replacing = !cm[20].matched;
if( library_name.matched ) {
YYLTYPE loc = location_in( mfile, library_name );
copybook.library( loc, xstrndup(library_name.first, library_name.length()) );
}
YYLTYPE loc = location_in( mfile, copybook_name );
outcome.fd = copybook.open( loc, xstrndup(copybook_name.first,
copybook_name.length()) );
if( outcome.fd == -1 ) { // let parser report missing copybook
dbgmsg("%s:%d: (no copybook '%s' found)", __func__, __LINE__, copybook.source());
return outcome;
}
if( replacing ) {
std::pair<std::list<replace_t>, char*>
result = parse_replace_pairs( cm[0].second, mfile.eodata, true );
const std::list<replace_t>& replacements(result.first);
outcome.parsed = (outcome.nreplace = replacements.size()) > 0;
if( outcome.parsed ) {
replace_directives.push(replacements);
}
copy_stmt.pend = result.second;
// Maybe we don't need these. We'll see.
for( const auto& r : replacements ) {
copybook.replacement(pseudo_e, r.before.dup().p, r.after.dup().p);
}
}
// If the parse failed, pass it through to the parser for analysis.
if( outcome.parsed ) {
erase_line( const_cast<char*>(copy_stmt.p),
const_cast<char*>(copy_stmt.pend));
outcome.erased_lines = copy_stmt;
}
mfile.eol = const_cast<char*>(copy_stmt.pend);
mfile.next_line();
}
return outcome;
}
static char *
parse_replace_last_off( const filespan_t& mfile ) {
static const char pattern[] =
"REPLACE" "[[:space:]]+"
"(LAST[[:space:]]+)?OFF[[:space:]]*[.]"
;
static regex re(pattern, extended_icase);
cmatch cm;
// REPLACE [LAST] OFF?
bool found = regex_search(mfile.ccur(),
const_cast<const char *>(mfile.eodata), cm, re);
gcc_assert(found); // caller ensures
gcc_assert(cm.size() == 2);
// LAST OFF removes most recent REPLACE
if( cm[1].matched ) {
gcc_assert(TOUPPER(cm[1].first[0]) == 'L');
if( ! replace_directives.empty() ) {
replace_directives.pop();
}
} else { // OFF clears the REPLACE stack
while( ! replace_directives.empty() ) {
replace_directives.pop();
}
}
dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsed '%.*s', ",
__func__, __LINE__,
(fmt_size_t)mfile.lineno(), int(cm[0].length()), cm[0].first );
// Remove statement from input
erase_line(const_cast<char*>(cm[0].first),
const_cast<char*>(cm[0].second));
return const_cast<char*>(cm[0].second);
}
static span_t
parse_replace_text( filespan_t& mfile ) {
static const char pattern[] =
/* 0 */ "REPLACE"
/* 1 */ "([[:space:]]+ALSO)?"
/* 2 */ "("
/*3,4*/ "([[:space:]]+(LEADING|TRAILING))?"
/* 5 */ "([[:space:]]+"
/* 6 */ "==" "(=?[^=]+)+" "=="
/* */ "[[:space:]]+BY[[:space:]]+"
/* 7 */ "==" "(=?[^=]+)*" "=="
/* */ ")"
/* */ ")+[[:space:]]*[.]"
;
static regex re(pattern, extended_icase);
cmatch cm;
const size_t current_lineno(mfile.lineno());
if( false && yy_flex_debug ) {
auto pend = mfile.eol;
gcc_assert(mfile.line_length() > 2);
if( pend[-1] == '\n' ) pend -= 2;
auto len = int(pend - mfile.cur);
dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": parsing '%.*s",
__func__, __LINE__,
(fmt_size_t)current_lineno, len, mfile.cur);
}
if( ! regex_search(mfile.ccur(), mfile.eodata, cm, re) ) {
dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED
": not a REPLACE statement:\n'%.*s'",
__func__, __LINE__, (fmt_size_t)current_lineno,
int(mfile.line_length()), mfile.cur );
return span_t();
}
// Report findings.
if( yy_flex_debug ) {
dbgmsg(HOST_SIZE_T_PRINT_UNSIGNED " expressions",
(fmt_size_t)std::count(pattern, pattern + sizeof(pattern), '('));
int i = 0;
for( const auto& m : cm ) {
if( m.matched )
dbgmsg("%s:%d: %2d: '%.*s'", __func__, __LINE__,
i, int(m.length()), m.first);
i++;
}
}
gcc_assert(cm.size() > 7);
// Update active REPLACE stack
if( ! cm[1].matched ) { // ALSO pushes, else clear stack and push one.
while( !replace_directives.empty() ) {
replace_directives.pop();
}
} else {
gcc_assert(TOUPPER(cm[1].first[0]) == 'A');
}
span_t replace_stmt(cm[0].first, cm[0].second);
std::pair<std::list<replace_t>, char*>
result = parse_replace_pairs(replace_stmt.p, replace_stmt.pend, false);
const std::list<replace_t>& replacements(result.first);
replace_directives.push( replacements );
if( yy_flex_debug ) {
dbgmsg( "%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ": " HOST_SIZE_T_PRINT_UNSIGNED
" pairs parsed from '%.*s'", __func__, __LINE__,
(fmt_size_t)current_lineno, (fmt_size_t)replacements.size(),
int(replace_stmt.size()), replace_stmt.p );
for( const auto& replace : replacements ) {
int i = 0;
dbgmsg("%s:%d:%4d: '%s' => '%s'", __func__, __LINE__,
++i, replace.before.p, replace.after.p);
}
}
// Remove statement from input
erase_line(const_cast<char*>(replace_stmt.p),
const_cast<char*>(replace_stmt.pend));
return replace_stmt;
}
static span_t
parse_replace_directive( filespan_t& mfile ) {
static const char *most_recent_buffer, *next_directive;
static bool off_coming_up;
static const char pattern[] =
"REPLACE" "[[:space:]]+" "(LAST|OFF|ALSO|LEADING|TRAILING|==)";
static regex re(pattern, extended_icase);
cmatch cm;
// REPLACE appears in current buffer?
if( most_recent_buffer != mfile.data || next_directive < mfile.cur ) {
most_recent_buffer = mfile.data;
next_directive = mfile.eodata;
if( regex_search(mfile.ccur(),
const_cast<const char *>(mfile.eodata), cm, re) ) {
gcc_assert(cm[1].matched);
next_directive = cm[0].first;
switch( TOUPPER(cm[1].first[0]) ) {
case 'L':
off_coming_up = 'A' == TOUPPER(cm[1].first[1]); // LAST OFF, else LEADING
break;
case 'O': // OFF
off_coming_up = true;
break;
case 'A': case 'T': case '=': // [ALSO] [ eading/Trailing] == ...
off_coming_up = false;
break;
default:
gcc_unreachable();
}
}
}
span_t erased;
// REPLACE appears on current line?
if( mfile.cur <= next_directive &&
next_directive < mfile.eol ) {
if( off_coming_up ) {
parse_replace_last_off(mfile);
} else {
erased = parse_replace_text(mfile);
}
}
return erased;
}
/*
* Maintain the number of newlines by counting those that will be
* overwritten, and appending them to the appended line. Return the
* new EOL pointer.
*
* The newlines accumulate past eodata, at the start of the blank
* lines created by the caller.
*/
char *
bytespan_t::append( const char *input, const char *eoinput ) {
gcc_assert(data < eodata);
#define LEXIO 0
#if LEXIO
auto nq = std::count_if(data, eodata, isquote);
dbgmsg("%s:%3d: input ------ '%.*s'", __func__, __LINE__, int(eoinput - input), input);
dbgmsg("%s:%3d: precondition '%.*s' (" HOST_SIZE_T_PRINT_UNSIGNED ": %s)",
__func__, __LINE__,
int(size()), data, (fmt_size_t)nq, in_string()? "in string" : "not in string");
#endif
if( !in_string() ) { // Remove trailing space unless it's part of a literal.
while(data < eodata && ISSPACE(eodata[-1])) eodata--;
gcc_assert(ISSPACE(eodata[0]));
gcc_assert(data == eodata || !ISSPACE(eodata[-1]));
}
// skip leading blanks
while( input < eoinput && ISSPACE(*input) ) input++;
if( isquote(*input) ) input++;
size_t len = eoinput - input;
char * pend = eodata + len;
int nnl = std::count(eodata, pend, '\n'); // newlines to be overwritten
gcc_assert(0 == std::count(input, eoinput, '\n')); // newlines in input
memmove(eodata, input, len);
nnl += std::count(pend, pend + nnl, '\n'); // other newlines to be overwritten
std::fill(pend, pend + nnl, '\n');
eodata = pend;
#if LEXIO
dbgmsg("%s:%3d: postcondition '%.*s'", __func__, __LINE__, int(size() + len) - 1, data);
#endif
return eodata;
}
const char * cobol_filename();
static filespan_t&
mapped_file( FILE *input ) {
static std::map<int, filespan_t> inputs;
int fd = fileno(input);
gcc_assert(fd > 0);
filespan_t& mfile = inputs[fd];
if( mfile.data ) {
return mfile;
}
struct stat sb;
if( 0 != fstat(fd, &sb) ) {
cbl_err( "%s: could not stat fd %d", __func__, fd );
}
mfile.use_nada();
if( sb.st_size > 0 ) {
static const int flags = MAP_PRIVATE;
void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0);
if( p == MAP_FAILED ) {
cbl_err( "%s: could not map fd %d", __func__, fd );
}
mfile.lineno_reset();
mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast<char*>(p);
mfile.eodata += sb.st_size;
}
return mfile;
}
char filespan_t::empty_file[8] = " \n";
static void unmap_file( filespan_t& mfile ) {
if( ! mfile.nada() ) {
munmap(mfile.data, mfile.size() - 1);
}
mfile = filespan_t();
}
extern int yylineno;
static void
print_lexer_input( const char *buf, const char *ebuf ) {
const char *eol, *lexio = getenv("lexio");
int i;
static int nbuf = 1;
static FILE *output = NULL;
if( !lexio ) return;
if( !output ) {
output = fopen( lexio, "w" );
if( !output ) output = stderr;
}
fprintf( output, "*> buffer %d\n", nbuf );
for( i = 0, eol = std::find(buf, ebuf, '\n');
eol != ebuf; buf = eol, eol = std::find(buf, ebuf, '\n'), i++ ) {
eol++;
fprintf( output, "%5d %.*s", yylineno + i, int(eol - buf), buf );
}
if( buf < ebuf ) {
fprintf( output, "%5d %.*s", yylineno + i, int(eol - buf), buf );
}
fprintf( output, "*> endbuf %d\n", nbuf++ );
fflush(output);
}
/*
* Fill about as much of the lexer's buffer as possible, except skip
* leading blanks on blank lines.
*/
int
lexer_input( char buf[], int max_size, FILE *input ) {
filespan_t& mfile( mapped_file(input) );
if( mfile.cur == mfile.eodata ) {
if( mfile.cur ) unmap_file(mfile);
return 0;
}
gcc_assert( mfile.data <= mfile.cur && mfile.cur < mfile.eodata );
char *next = std::min(mfile.eodata, mfile.cur + max_size);
buffer_t output(buf, buf + max_size); // initializes pos
// Fill output, keeping only NL for blank lines.
for( auto p = mfile.cur; p < next; *output.pos++ = *p++ ) {
static bool at_bol = false;
if( at_bol ) {
auto nonblank_l = std::find_if( p, next,
[]( char ch ) {
return !isblank(ch); } );
if( nonblank_l + 1 < next ) {
if( *nonblank_l == '\r' ) nonblank_l++; // Windows
if( *nonblank_l == '\n' ) {
p = nonblank_l;
continue;
}
}
}
at_bol = *p == '\n';
}
gcc_assert( output.pos <= output.eodata );
output.eodata = output.pos;
mfile.cur = next;
gcc_assert(mfile.cur <= mfile.eodata);
// Buffer full or input exhausted.
print_lexer_input(output.data, output.eodata);
return output.size();
}
static const char *
find_filter( const char filter[] ) {
if( 0 == access(filter, X_OK) ) {
return filter;
}
const char *path = getenv("PATH");
if( ! path ) return NULL;
char *p = xstrdup(path), *eopath = p + strlen(p);
while( *p != '\0' ) {
auto pend = std::find( p, eopath, ':' );
if( *pend == ':' ) *pend++ = '\0';
char *name = xasprintf( "%s/%s", p, filter );
if( 0 == access(name, X_OK) ) {
return name;
}
p = pend;
}
return NULL;
}
bool verbose_file_reader = false;
typedef std::pair <char *, std::list<std::string> > preprocessor_filter_t;
static std::list<preprocessor_filter_t> preprocessor_filters;
static std::list<const char *> included_files;
/*
* Keep a list of files added with -include on the command line.
*/
bool
include_file_add(const char filename[]) {
struct stat sb;
if( -1 == stat(filename, &sb) ) return false;
included_files.push_back(filename);
return true;
}
bool
preprocess_filter_add( const char input[] ) {
std::list <std::string> options;
std::string filter(input);
size_t pos = filter.find(",");
if( pos != filter.npos ) {
std::vector<char> others( filter.size() - pos, '\0' );
std::copy( filter.begin() + pos + 1, filter.end(), others.begin() );
filter.resize(pos);
char *optstr = others.data();
for( char *opt = optstr + 1; (opt = strtok(opt, ",")); opt = NULL ) {
options.push_back(opt);
}
}
auto filename = find_filter(filter.c_str());
if( !filename ) {
yywarn("preprocessor '%s/%s' not found", getcwd(NULL, 0), filter.c_str());
return false;
}
preprocessor_filters.push_back( std::make_pair(xstrdup(filename), options) );
return true;
}
void
cdftext::echo_input( int input, const char filename[] ) {
int fd;
if( -1 == (fd = dup(input)) ) {
yywarn( "could not open preprocessed file %s to echo to standard output",
filename );
return;
}
auto mfile = map_file(fd);
if( -1 == write(STDOUT_FILENO, mfile.data, mfile.size()) ) {
yywarn( "could not write preprocessed file %s to standard output",
filename );
}
if( -1 == munmap(mfile.data, mfile.size()) ) {
yywarn( "could not release mapped file" );
}
if( -1 == close(fd) ) {
yywarn( "could not close mapped file" );
}
}
static inline ino_t
inode_of( int fd ) {
struct stat sb;
if( -1 == fstat(fd, &sb) ) {
cbl_err( "could not stat fd %d", fd);
}
return sb.st_ino;
}
FILE *
cdftext::lex_open( const char filename[] ) {
int input = open_input( filename );
if( input == -1 ) return NULL;
int output = open_output();
// Process any files supplied by the -include command-line option.
for( auto name : included_files ) {
if( -1 == (input = open(name, O_RDONLY)) ) {
yyerrorvl(1, "", "cannot open -include file %s", name);
continue;
}
cobol_filename(name, inode_of(input));
filespan_t mfile( free_form_reference_format( input ) );
process_file( mfile, output );
cobol_filename_restore(); // process_file restores only for COPY
}
included_files.clear();
cobol_filename(filename, inode_of(input));
filespan_t mfile( free_form_reference_format( input ) );
process_file( mfile, output );
if( lexer_echo() ) {
echo_input(output, filename);
}
for( auto filter_pair : preprocessor_filters ) {
input = output;
output = open_output();
char *filter = filter_pair.first;
std::list<std::string>& options = filter_pair.second;
std::vector <char*> argv(2 + options.size(), NULL);
argv[0] = filter;
auto last_argv = std::transform( options.begin(), options.end(), argv.begin() + 1,
[]( const std::string& opt ) {
return xstrdup(opt.c_str());
} );
*last_argv = NULL;
pid_t pid = fork();
switch(pid){
case -1: cbl_err( "%s", __func__);
break;
case 0: // child
if( -1 == dup2(input, STDIN_FILENO) ) {
cbl_err( "%s: could not dup input", __func__);
}
if( -1 == dup2(output, STDOUT_FILENO) ) {
cbl_err( "%s: could not dup output", __func__);
}
if( -1 == lseek(STDIN_FILENO, SEEK_SET, 0) ) {
cbl_err( "%s: could not seek to start of file", __func__);
}
int erc;
if( -1 == (erc = execv(filter, argv.data())) ) {
yywarn("could not execute %s", filter);
}
_exit(erc);
}
int status;
auto kid = wait(&status);
gcc_assert(pid == kid);
if( kid == -1 ) cbl_err( "failed waiting for pid %ld", static_cast<long>(pid));
if( WIFSIGNALED(status) ) {
cbl_errx( "%s pid %ld terminated by %s",
filter, static_cast<long>(kid), strsignal(WTERMSIG(status)) );
}
if( WIFEXITED(status) ) {
if( (status = WEXITSTATUS(status)) != 0 ) {
cbl_errx( "%s exited with status %d",
filter, status);
}
}
yywarn( "applied %s", filter );
}
return fdopen( output, "r");
}
int
cdftext::open_input( const char filename[] ) {
int fd = open(filename, O_RDONLY);
if( fd == -1 ) {
dbgmsg( "could not open '%s': %s", filename, xstrerror(errno) );
}
verbose_file_reader = NULL != getenv("GCOBOL_TEMPDIR");
if( verbose_file_reader ) {
yywarn("verbose: opening %s for input", filename);
}
return fd;
}
int
cdftext::open_output() {
char *name = getenv("GCOBOL_TEMPDIR");
if( name && 0 != strcmp(name, "/") ) {
int fd;
char * stem = xasprintf("%sXXXXXX", name);
if( -1 == (fd = mkstemp(stem)) ) {
cbl_err( "could not open temporary file '%s' (%s)",
name, realpath(name, stem));
}
return fd;
}
FILE *fh = tmpfile();
if( !fh ) {
cbl_err("could not create temporary file");
}
return fileno(fh);
}
filespan_t
cdftext::map_file( int fd ) {
gcc_assert(fd > 0);
filespan_t mfile;
mfile.use_nada();
struct stat sb;
do {
if( 0 != fstat(fd, &sb) ) {
cbl_err( "%s: could not stat fd %d", __func__, fd );
}
if( S_ISFIFO(sb.st_mode) ) {
// Copy FIFO to regular file that can be mapped.
int input = open_output();
std::swap(fd, input); // fd will continue to be the input
static char block[4096 * 4];
ssize_t n;
while( (n = read(input, block, sizeof(block))) != 0 ) {
ssize_t nout = write(fd, block, n);
if( nout != n ) {
cbl_err( "%s: could not prepare map file from FIFO %d",
__func__, input);
}
if( false ) dbgmsg("%s: copied " HOST_SIZE_T_PRINT_DEC " bytes from FIFO",
__func__, (fmt_size_t)nout);
}
}
} while( S_ISFIFO(sb.st_mode) );
if( sb.st_size > 0 ) {
static const int flags = MAP_PRIVATE;
void *p = mmap(0, sb.st_size, PROT_READ|PROT_WRITE, flags, fd, 0);
if( p == MAP_FAILED ) {
cbl_err( "%s: could not map fd %d", __func__, fd );
}
mfile.lineno_reset();
mfile.data = mfile.cur = mfile.eol = mfile.eodata = static_cast<char*>(p);
mfile.eodata += sb.st_size;
}
return mfile;
}
bool lexio_dialect_mf();
/*
* A valid sequence area is 6 digits or blanks at the begining of the line that
* contains PROGRAM-ID. Return NULL if no valid sequence area, else return
* pointer to BOL.
*/
static const char *
valid_sequence_area( const char *data, const char *eodata ) {
for( const char *p = data;
(p = std::find_if(p, eodata, is_p)) != eodata;
p++ )
{
auto eol = std::find(p, eodata, '\n');
if( p == data || ISSPACE(p[-1]) ) {
if( is_program_id(p, eol) ) { // found program-id token
const char *bol = p;
for( ; data <= bol-1 && bol[-1] != '\n'; --bol )
;
if( 6 < p - bol ) {
if( std::all_of(bol, bol+6, ::isdigit) ) {
return bol;
}
if( std::all_of(bol, bol+6, ::isblank) ) {
return bol;
}
break;
}
}
}
}
return nullptr;
}
/*
* Reference Format -- valid COBOL between columns 8 and 72 -- has data after
* column 72 on the PROGRAM-ID line. Extended Reference Format (that allows
* longer lines) has no reason to follow the PROGRAM-ID with more stuff.
*/
static bool
infer_reference_format( const char *bol, const char *eodata ) {
assert(bol);
auto eol = std::find(bol, eodata, '\n');
if( 72 < eol - bol ) {
return ! std::all_of(bol + 72, eol, ::isspace);
}
return false;
}
filespan_t
cdftext::free_form_reference_format( int input ) {
filespan_t source_buffer = map_file(input);
filespan_t mfile(source_buffer);
/*
* current_line_t describes the segment of mapped file that is the
* "current line" being processed. Its only use is for line
* continuation, whether string literals or not.
*/
struct current_line_t {
size_t lineno;
bytespan_t line;
// construct with length zero
explicit current_line_t( char data[] ) : lineno(0), line(data, data) {}
} current( mfile.data );
/*
* Infer source code format.
*/
if( cdf_source_format().inference_pending() ) {
const char *bol = valid_sequence_area(mfile.data, mfile.eodata);
if( bol ) {
cdf_source_format().infer( bol, infer_reference_format(bol, mfile.eodata) );
}
}
while( mfile.next_line() ) {
check_push_pop_directive(mfile);
check_source_format_directive(mfile);
remove_inline_comment(mfile.cur, mfile.eol);
if( mfile.is_blank_line() ) continue;
char *indcol = indicated(mfile.cur, mfile.eol); // true only for fixed
// // format
if( is_fixed_format() && !indcol ) { // short line
erase_source(mfile.cur, mfile.eol);
}
if( indcol ) {
// Set to blank columns 1-6 and anything past the right margin.
erase_source(mfile.cur, indcol);
if( is_reference_format() ) {
if( mfile.cur + right_margin() < mfile.eol ) {
auto p = std::find(mfile.cur + right_margin(), mfile.eol, '\n');
erase_source(mfile.cur + right_margin(), p);
}
}
switch( TOUPPER(*indcol) ) {
case '-':
gcc_assert(0 < current.line.size());
/*
* The "current line" -- the line being continued -- may be many
* lines earlier (with many intervening newlines) or may intrude
* on its succeeding line. Erase the continuation line.
*/
{
char *pend = mfile.eol;
if( right_margin() ) {
pend = std::min(mfile.cur + right_margin(), mfile.eol);
}
// The appended segment has no newline because the erased line retains
// one.
pend = std::find(indcol + 1, pend, '\n');
char *p = current.line.append(indcol + 1, pend );
if( (p = std::max(p, mfile.cur)) < mfile.eol ) {
erase_source(p, mfile.eol);
}
}
continue;
case SPACE:
break;
case 'D':
/*
* Pass the D to the lexer, because WITH DEBUGGING MODE is
* parsed in the parser. This assumes too strict a rule: that
* all the source is in one format. In fact, DEBUGGING MODE
* could be set on, and >>SOURCE-FORMAT can switch back and
* forth. To solve that, we'd have to parse WITH DEBUGGING MODE
* in free_form_reference_format(), which is a lot of work for
* an obsolete feature.
*/
break;
case '*':
case '/':
if( indcol < mfile.eol - 1 ) {
erase_source(indcol, mfile.eol);
}
continue;
case '$':
if( lexio_dialect_mf() ) {
break;
}
__attribute__ ((fallthrough));
default: // flag other characters in indicator area
if( ! ISSPACE(indcol[0]) ) {
yyerrorvl( mfile.lineno(), cobol_filename(),
"error: stray indicator '%c' (0x%x): \"%.*s\"",
indcol[0], indcol[0],
int(mfile.line_length() - 1), mfile.cur );
*indcol = SPACE;
}
break;
}
}
current.line.update(mfile.cur, mfile.eol, right_margin());
current.lineno = mfile.lineno();
} // next line
return source_buffer;
}
/*
* process_file is a recursive routine that opens and processes
* included files. It uses the input file stack in two ways: to check
* copybook uniqueness, and (via the lexer) to keep track filenames
* and line numbers.
*
* When reading copybook files, the copybook object enforces the rule
* that no copybook may include itself, even indirectly. It does that
* by relying on the unique_stack to deny a push. Because the reader
* makes no attempt to count lines, line numbers in the input stack
* are all 1 at this point.
*
* When returning from the top-level recursion, the input stack has
* the original file's name on top, with depth 1. At that point, the
* lexer begins tokenizing the input.
*
* The input stream sent to the lexer is delimited by #FILE tokens
* denoting the source filename. As far as the lexer is concerned,
* there's only ever one file: the name passed to lex_open() when we
* kicked things off. But messages and the debugger need to know
* which file and line each statment appeared in.
*
* The lexer uses the input stack to keep track of names and
* numbers. The top of the input file stack is the current file
* context, initially set to line 1. When the lexer sees a push, it
* updates the top-of-stack with the current line number, yylineno,
* and then pushes the copybook filename with line 1. When it sees a
* pop, the current file is popped, of course; its line number no
* longer matters. Then the top-of-stack is used to update the current
* cobol filename and yylineno.
*/
void
cdftext::process_file( filespan_t mfile, int output, bool second_pass ) {
static size_t nfiles = 0;
__gnu_cxx::stdio_filebuf<char> outbuf(fdopen(output, "a"), std::ios::out);
std::ostream out(&outbuf);
std::ostream_iterator<char> ofs(out);
// indicate current file
static const char file_push[] = "\f#FILE PUSH ", file_pop[] = "\f#FILE POP\f";
if( !included_files.empty() ) { ++nfiles; }; // force push/pop of included filename
if( !second_pass && nfiles++ ) {
static const char delimiter[] = "\f";
const char *filename = cobol_filename();
std::copy(file_push, file_push + strlen(file_push), ofs);
std::copy(filename, filename + strlen(filename), ofs);
std::copy(delimiter, delimiter + strlen(delimiter), ofs);
out.flush();
}
// parse CDF directives
while( mfile.next_line() ) {
yylloc = mfile.as_location();
auto copied = parse_copy_directive(mfile);
if( copied.parsed && copied.fd != -1 ) {
gcc_assert(copied.erased_lines.p);
std::copy_if(copied.erased_lines.p, copied.erased_lines.pend, ofs,
[]( char ch ) { return ch == '\n'; } );
struct { int in, out; filespan_t mfile; } copy;
dbgmsg("%s:%d: line " HOST_SIZE_T_PRINT_UNSIGNED ", opening %s on fd %d",
__func__, __LINE__, (fmt_size_t)mfile.lineno(),
copybook.source(), copybook.current()->fd);
copy.in = copybook.current()->fd;
copy.mfile = free_form_reference_format( copy.in );
if( copied.partial_line.size() ) {
std::copy(copied.partial_line.p, copied.partial_line.pend, ofs);
}
out.flush();
if( copied.nreplace == 0 ) {
// process with extant REPLACE directive
process_file(copy.mfile, output);
} else {
copy.out = open_output();
// process to intermediate, applying COPY ... REPLACING
process_file(copy.mfile, copy.out);
copy.mfile = map_file(copy.out);
replace_directives.pop();
// process intermediate with extant REPLACE directive
process_file(copy.mfile, output, true);
// COPY statement is erased from input if processed successfully
}
cobol_filename_restore();
}
auto erased = parse_replace_directive(mfile);
if( erased.p ) {
std::copy_if( erased.p, erased.pend, ofs,
[]( char ch ) { return ch == '\n'; } );
}
if( replace_directives.empty() ) {
std::copy(mfile.cur, mfile.eol, ofs);
continue; // No active REPLACE directive.
}
std::list<span_t> segments = segment_line(mfile);
for( const auto& segment : segments ) {
std::copy(segment.p, segment.pend, ofs);
}
out.flush();
}
// end of file
if( !second_pass && --nfiles ) {
std::copy(file_pop, file_pop + strlen(file_pop), ofs);
out.flush();
}
if( !included_files.empty() ) { --nfiles; };
}
std::list<span_t>
cdftext::segment_line( filespan_t& mfile ) {
std::list<span_t> output;
gcc_assert( ! replace_directives.empty() );
std::list<replace_t> pending;
recognize_replacements( mfile, pending );
if( pending.empty() ) {
output.push_back( span_t(mfile.cur, mfile.eol) );
return output;
}
/*
* If the replacement changes the number of lines in the replaced text, we
* need to reset the line number, because the next statement is on a
* different line in the manipulated text than in the original. Before each
* replacement, set the original line number. After each replacement, set
* the line number after the elided text on the next line.
*/
for( const replace_t& segment : pending ) {
gcc_assert(mfile.cur <= segment.before.p);
gcc_assert(segment.before.pend <= mfile.eodata);
struct { unsigned long ante, post; } lineno = {
gb4(mfile.lineno()), gb4(mfile.lineno() + segment.after.nlines())
};
const char *directive = lineno.ante == lineno.post?
nullptr : xasprintf("\n#line %lu \"%s\"\n",
lineno.ante, cobol_filename());
if( directive )
output.push_back( span_t(strlen(directive), directive) );
output.push_back( span_t(mfile.cur, segment.before.p) );
output.push_back( span_t(segment.after.p, segment.after.pend ) );
if( directive )
output.push_back( span_t(strlen(directive), directive) );
mfile.cur = const_cast<char*>(segment.before.pend);
}
if( mfile.eol < mfile.cur ) {
if( (mfile.eol = std::find(mfile.cur, mfile.eodata, '\n')) < mfile.eodata ) {
mfile.eol++;
}
}
// last segment takes to EOL
output.push_back( span_t(mfile.cur, mfile.eol) );
return output;
}