| /* |
| * Copyright (c) 2021-2025 Symas Corporation |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions are |
| * met: |
| * |
| * * Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * * Redistributions in binary form must reproduce the above |
| * copyright notice, this list of conditions and the following disclaimer |
| * in the documentation and/or other materials provided with the |
| * distribution. |
| * * Neither the name of the Symas Corporation nor the names of its |
| * contributors may be used to endorse or promote products derived from |
| * this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| /* |
| * Flex override |
| */ |
| static void /* yynoreturn */ yy_fatal_error ( const char* msg ); |
| |
| static void inline |
| die_fatal_error( const char msg[] ) { |
| cbl_internal_error("scan.o: %s", msg); |
| yy_fatal_error(msg); |
| } |
| |
| #define YY_FATAL_ERROR(msg) die_fatal_error((msg)) |
| |
| /* |
| * External functions |
| */ |
| |
| void parser_enter_file(const char *filename); |
| void parser_leave_file(); |
| |
| bool is_fixed_format(); |
| bool include_debug(); |
| int lexer_input( char buf[], int max_size, FILE *input ); |
| |
| const char * keyword_str( int token ); |
| |
| int repository_function_tok( const char name[] ); |
| |
| void cobol_set_indicator_column( int column ); |
| |
| void next_sentence_label(cbl_label_t*); |
| |
| int repeat_count( const char picture[] ); |
| |
| size_t program_level(); |
| |
| int ydfparse(void); |
| |
| FILE * copy_mode_start(); |
| |
| /* |
| * Public functions and data |
| */ |
| |
| cbl_label_t *next_sentence; |
| |
| static bool echo_on = false; |
| |
| void |
| lexer_echo( bool tf ) { |
| echo_on = tf; |
| } |
| |
| bool |
| lexer_echo() { |
| return echo_on; |
| } |
| |
| // IBM says a picture can be up to 50 bytes, not 1000 words. |
| // ISO says a picture can be up to 63 bytes. We allow for a NUL terminator. |
| static char orig_picture[PICTURE_MAX]; |
| static char orig_number[80]; |
| |
| const char * |
| original_picture() { |
| const char *out = xstrdup(orig_picture); |
| assert(orig_picture[0] != '\0'); |
| return out; |
| } |
| |
| char * |
| original_number( char input[] = NULL ) { |
| if( input ) { |
| if(sizeof(orig_number) < strlen(input) ) return NULL; |
| strcpy(orig_number, input); |
| return input; |
| } |
| char *out = xstrdup(orig_number); |
| assert(orig_number[0] != '\0'); |
| return out; |
| } |
| |
| /* |
| * Local functions |
| */ |
| static const char * start_condition_str( int sc ); |
| static const char * start_condition_is(); |
| |
| static bool nonspace( char ch ) { return !ISSPACE(ch); } |
| |
| static int |
| numstr_of( const char string[], radix_t radix = decimal_e ) { |
| yylval.numstr.radix = radix; |
| ydflval.string = yylval.numstr.string = xstrdup(string); |
| char *comma = strchr(yylval.numstr.string, ','); |
| if( comma && comma[1] == '\0' ) *comma = '\0'; |
| if( ! original_number(yylval.numstr.string) ) { |
| error_msg(yylloc, "input inconceivably long"); |
| return NO_CONDITION; |
| } |
| |
| const char *input = yylval.numstr.string; |
| auto eoinput = input + strlen(input); |
| auto p = std::find_if( input, eoinput, |
| []( char ch ) { return ch == 'e' || ch == 'E';} ); |
| |
| if( p < eoinput ) { |
| if( eoinput == std::find(input, eoinput, symbol_decimal_point()) ) { |
| // no decimal point: 1E0 is a valid user-defined name |
| ydflval.string = yylval.string = yylval.numstr.string; |
| return NAME; |
| } |
| assert(input < p); |
| // "The literal to the left of the 'E' represents the significand. It may |
| // be signed and shall include a decimal point. The significand shall be |
| // from 1 to 36 digits in length." |
| if( p == std::find(input, p, symbol_decimal_point()) ) { |
| return NO_CONDITION; |
| } |
| auto nx = std::count_if(input, p, fisdigit); |
| if( 36 < nx ) { |
| error_msg(yylloc, "significand of %s has more than 36 digits (%td)", input, nx); |
| return NO_CONDITION; |
| } |
| |
| // "The literal to the right of the 'E' represents the exponent. It may be |
| // signed and shall have a maximum of four digits and no decimal point. " |
| // "The maximum permitted value and minimum permitted value of the |
| // exponent is implementor-defined." (We allow 9999.) |
| nx = std::count_if(p, eoinput, fisdigit); |
| if( 4 < nx ) { |
| error_msg(yylloc, "exponent %qs more than 4 digits", ++p); |
| return NO_CONDITION; |
| } |
| if( eoinput != std::find(p, eoinput, symbol_decimal_point()) ) { |
| error_msg(yylloc, "exponent %qs includes decimal point", ++p); |
| return NO_CONDITION; |
| } |
| |
| // "If all the digits in the significand are zero, then all the digits of |
| // the exponent shall also be zero and neither significand nor exponent |
| // shall have a negative sign." |
| bool zero_signficand = std::all_of( input, p, |
| []( char ch ) { |
| return !ISDIGIT(ch) || ch == '0'; } ); |
| if( zero_signficand ) { |
| if( p != std::find(input, p, '-') ) { |
| error_msg(yylloc, "zero significand of %s " |
| "cannot be negative", input); |
| return NO_CONDITION; |
| } |
| if( eoinput != std::find(p, eoinput, '-') ) { |
| error_msg(yylloc, "exponent of zero significand of %s " |
| "cannot be negative", input); |
| return NO_CONDITION; |
| } |
| } |
| } |
| if( 1 < std::count(input, eoinput, symbol_decimal_point()) ) { |
| error_msg(yylloc, "invalid numeric literal %qs", ++p); |
| return NO_CONDITION; |
| } |
| |
| return NUMSTR; |
| } |
| |
| static char * |
| null_trim( char name[] ) { |
| auto p = std::find_if( name, name + strlen(name), fisspace ); |
| if( p < name + strlen(name) ) *p = '\0'; |
| return name; |
| } |
| |
| /* |
| * CDF management |
| */ |
| static int final_token; |
| |
| static inline const char * |
| boolalpha( bool tf ) { return tf? "True" : "False"; } |
| |
| struct cdf_status_t { |
| int lineno; |
| const char *filename; |
| int token; |
| bool parsing; |
| cdf_status_t() |
| : lineno(yylineno), filename(cobol_filename()) |
| , token(0), parsing(true) |
| {} |
| cdf_status_t( int token, bool parsing ) |
| : lineno(yylineno), filename(cobol_filename()) |
| , token(token), parsing(parsing) |
| {} |
| bool toggle() { return parsing = ! parsing; } |
| |
| const char * str() const { |
| static char line[132]; |
| snprintf(line, sizeof(line), "%s:%d: %s, parsing %s", |
| filename, lineno, keyword_str(token), boolalpha(parsing)); |
| return line; |
| } |
| static const char * as_string( const cdf_status_t& status ) { |
| return status.str(); |
| } |
| }; |
| |
| /* |
| * Scanning status is true if tokens are being parsed and false if not (because |
| * CDF is skipping some code). Because CDF status is nested, status is true |
| * only if the whole stack is true. That is, if B is stacked on A, and A is |
| * false, then all of B is skipped, regardless of >>IF and >>ELSE for B. |
| */ |
| static bool run_cdf( int token ); |
| |
| static class parsing_status_t : public std::stack<cdf_status_t> { |
| typedef int (parser_t)(void); |
| struct parsing_state_t { |
| bool at_eof, expect_field_level; |
| int pending_token; |
| parser_t *parser; |
| parsing_state_t() |
| : at_eof(false) |
| , expect_field_level(true) |
| , pending_token(0) |
| , parser(yyparse) |
| {} |
| } state, shadow; |
| |
| public: |
| bool on() const { // true only if all true |
| bool parsing = std::all_of( c.begin(), c.end(), |
| []( const auto& status ) { return status.parsing; } ); |
| return parsing; |
| } |
| |
| bool feed_a_parser() const { |
| return on() || state.parser == ydfparse; |
| } |
| |
| void need_level( bool tf ) { state.expect_field_level = tf; } |
| bool need_level() const { return state.expect_field_level; } |
| |
| void parser_save( parser_t * new_parser ) { |
| shadow = state; |
| state.parser = new_parser; |
| } |
| void parser_restore() { |
| state.parser = shadow.parser; |
| } |
| |
| void inject_token( int token ) { state.pending_token = token; } |
| int pending_token() { |
| int token = state.pending_token; |
| state.pending_token = 0; |
| return token; |
| } |
| |
| void at_eof( bool tf ) { state.at_eof = shadow.at_eof = tf; assert(tf); } |
| bool at_eof() const { return state.at_eof; } |
| |
| bool in_cdf() const { return state.parser == ydfparse; } |
| bool normal() const { return on() && state.parser == yyparse; } |
| |
| void splat() const { |
| int i=0; |
| for( const auto& status : c ) { |
| yywarn( "%d %s", ++i, status.str() ); |
| } |
| } |
| } parsing; |
| |
| // Used only by parser, so scanner_normal() obviously true. |
| void field_done() { orig_picture[0] = '\0'; parsing.need_level(true); } |
| |
| static int scanner_token() { |
| if( parsing.empty() ) { |
| error_msg(yylloc, "%<>>ELSE%> or %<>>END-IF%> without %<>>IF%>"); |
| return NO_CONDITION; |
| } |
| return parsing.top().token; |
| } |
| |
| bool scanner_parsing() { return parsing.on(); } |
| bool scanner_normal() { return parsing.normal(); } |
| |
| void scanner_parsing( int token, bool tf ) { |
| parsing.push( cdf_status_t(token, tf) ); |
| if( yydebug ) { |
| yywarn("%s: parsing now %s, depth %zu", |
| keyword_str(token), boolalpha(parsing.on()), parsing.size()); |
| parsing.splat(); |
| } |
| } |
| void scanner_parsing_toggle() { |
| if( parsing.empty() ) { |
| error_msg(yylloc, "%<>>ELSE%> without %<>>IF%>"); |
| return; |
| } |
| parsing.top().toggle(); |
| if( yydebug ) { |
| yywarn("%s: parsing now %s", |
| keyword_str(CDF_ELSE), boolalpha(parsing.on())); |
| } |
| } |
| void scanner_parsing_pop() { |
| if( parsing.empty() ) { |
| error_msg(yylloc, "%<>>END-IF%> without %<>>IF%>"); |
| return; |
| } |
| parsing.pop(); |
| if( yydebug ) { |
| yywarn("%s: parsing now %s, depth %zu", |
| keyword_str(CDF_END_IF), boolalpha(parsing.on()), |
| parsing.size()); |
| parsing.splat(); |
| } |
| } |
| |
| |
| static bool level_needed() { |
| return scanner_normal() && parsing.need_level(); |
| } |
| |
| static void level_found() { |
| if( scanner_normal() ) parsing.need_level(false); |
| } |
| |
| /* |
| * Trim the scanned location by the amount about to re-scanned. |
| * Must be a macro because it expands yyless. |
| */ |
| #define myless(N) \ |
| do { \ |
| auto n(N); \ |
| trim_location(n); \ |
| yyless(n); \ |
| } while(0) |
| |
| class enter_leave_t { |
| typedef void( parser_enter_file_f)(const char *filename); |
| typedef void (parser_leave_file_f)(); |
| parser_enter_file_f *entering; |
| parser_leave_file_f *leaving; |
| const char *filename; |
| |
| public: |
| enter_leave_t() : entering(NULL), leaving(NULL), filename(NULL) {} |
| enter_leave_t( parser_enter_file_f *entering, const char *filename ) |
| : entering(entering), leaving(NULL), filename(filename) |
| {} |
| explicit enter_leave_t(parser_leave_file_f *leaving) |
| : entering(NULL), leaving(leaving), filename(NULL) {} |
| |
| void notify() { |
| if( entering ) { |
| cobol_filename(filename, 0); |
| if( yy_flex_debug ) dbgmsg("starting line %4d of %s", |
| yylineno, filename); |
| entering(filename); |
| gcc_assert(leaving == NULL); |
| } |
| if( leaving ) { |
| cobol_filename_restore(); |
| if( yy_flex_debug ) dbgmsg("resuming line %4d of %s", |
| yylineno, cobol_filename()); |
| leaving(); |
| gcc_assert(entering == NULL); |
| } |
| } |
| }; |
| |
| static class input_file_status_t { |
| std::queue <enter_leave_t> inputs; |
| public: |
| void enter(const char *filename) { |
| inputs.push( enter_leave_t(parser_enter_file, filename) ); |
| } |
| void leave() { |
| inputs.push( enter_leave_t(parser_leave_file) ); |
| } |
| void notify() { |
| while( ! inputs.empty() ) { |
| auto enter_leave = inputs.front(); |
| enter_leave.notify(); |
| inputs.pop(); |
| } |
| } |
| } input_file_status; |
| |
| void input_file_status_notify() { input_file_status.notify(); } |
| |
| /* |
| * parse.y and cdf.y each define a 4-integer struct to hold a token's location. |
| * parse.y uses YYLTYPE yylloc; |
| * cdf.y uses YDFLLTYPE ydflloc; |
| * |
| * The structs have identical definitions with different types and of course |
| * names. We define "conversion" between them for convenience. |
| * |
| * Each parser expects its location value to be updated whenever it calls |
| * yylex(). Therefore, here in the lexer we set both locations as each token |
| * is scanned, so that both parsers see the same location. |
| */ |
| static YDFLTYPE |
| ydfltype_of( const YYLTYPE& loc ) { |
| YDFLTYPE output { |
| loc.first_line, loc.first_column, |
| loc.last_line, loc.last_column }; |
| return output; |
| } |
| |
| /* |
| * After the input filename and yylineno are set, update the location of the |
| * scanned token. |
| */ |
| static void |
| update_location( const YYLTYPE *ploc = nullptr ) { |
| YYLTYPE loc = { |
| yylloc.last_line, yylloc.last_column, |
| yylineno, yylloc.last_column + yyleng |
| }; |
| if( ploc ) loc = *ploc; |
| |
| const char *p = static_cast<char*>(memrchr(yytext, '\n', yyleng)); |
| if( p ) { |
| loc.last_column = (yytext + yyleng) - p; |
| } |
| |
| yylloc = loc; |
| ydflloc = ydfltype_of(yylloc); |
| |
| dbgmsg(" SC: %s location (%d,%d) to (%d,%d)", |
| start_condition_is(), |
| yylloc.first_line, yylloc.first_column, |
| yylloc.last_line, yylloc.last_column); |
| } |
| |
| static void |
| reset_location() { |
| static const YYLTYPE loc { yylineno, 1, yylineno, 1 }; |
| update_location(&loc); |
| } |
| |
| #define YY_USER_ACTION update_location(); |
| |
| static void |
| trim_location( int nkeep) { |
| gcc_assert( 0 <= nkeep && nkeep <= yyleng ); |
| struct { char *p, *pend; |
| size_t size() const { return pend - p; } |
| } rescan = { yytext + nkeep, yytext + yyleng }; |
| |
| auto nline = std::count(rescan.p, rescan.pend, '\n'); |
| dbgmsg("%s:%d: yyless(%d), rescan '%.*s' (" HOST_SIZE_T_PRINT_UNSIGNED |
| " lines, " HOST_SIZE_T_PRINT_UNSIGNED " bytes)", |
| __func__, __LINE__, |
| nkeep, |
| int(rescan.size()), rescan.p, |
| (fmt_size_t)nline, (fmt_size_t)rescan.size()); |
| if( nline ) { |
| gcc_assert( yylloc.first_line + nline <= yylloc.last_line ); |
| yylloc.last_line -= int(nline); |
| gcc_assert( yylloc.first_line <= yylloc.last_line ); |
| char *p = static_cast<char*>(memrchr(rescan.p, '\n', rescan.size())); |
| yylloc.last_column = rescan.pend - ++p; |
| return; |
| } |
| |
| gcc_assert( int(rescan.size()) < yylloc.last_column ); |
| yylloc.last_column -= rescan.size(); |
| if( yylloc.last_column < yylloc.first_column ) { |
| yylloc.first_column = 1; |
| } |
| |
| location_dump(__func__, __LINE__, "yylloc", yylloc); |
| } |
| |
| static void |
| update_location_col( const char str[], int correction = 0) { |
| auto col = yylloc.last_column - strlen(str) + correction; |
| if( col > 0 ) { |
| yylloc.first_column = col; |
| } |
| location_dump(__func__, __LINE__, "yylloc", yylloc); |
| } |
| |
| #define not_implemented(...) cbl_unimplemented_at(yylloc, __VA_ARGS__) |
| |
| #define YY_USER_INIT do { \ |
| static YYLTYPE ones = {1,1, 1,1}; \ |
| yylloc = ones; \ |
| ydflloc = ydfltype_of(yylloc); \ |
| } while(0) |
| |
| /* |
| * YY_DECL is the generated lexer. The parser calls yylex(). yylex() invokes |
| * next_token(), which calls this lexer function. The Flex-generated code |
| * updates neither yylval nor yylloc. That job is left to the actions. |
| * |
| * The parser relies on yylex to set yylval and yylloc each time it is |
| * called. It maintains a separate copy for each term, and uses |
| * YYLLOC_DEFAULT() to update the location of nonterminals. |
| */ |
| #define YY_DECL int lexer(void) |
| |
| # define YY_INPUT(buf, result, max_size) \ |
| { \ |
| if( 0 == (result = lexer_input(buf, max_size, yyin)) ) \ |
| result = YY_NULL; \ |
| } |
| |
| #define bcomputable(T, C) \ |
| yylval.computational.type=T, \ |
| yylval.computational.capacity=C, \ |
| yylval.computational.signable=true, BINARY_INTEGER |
| #define scomputable(T, C) \ |
| yylval.computational.type=T, \ |
| yylval.computational.capacity=C, \ |
| yylval.computational.signable=true, COMPUTATIONAL |
| #define ucomputable(T, C) \ |
| yylval.computational.type=T, \ |
| yylval.computational.capacity=C, \ |
| yylval.computational.signable=false, COMPUTATIONAL |
| |
| static char *tmpstring = NULL; |
| |
| #define PROGRAM current_program_index() |
| |
| // map of alias => canonical |
| static std::map <std::string, std::string> keyword_aliases; |
| |
| const std::string& |
| keyword_alias_add( const std::string& keyword, const std::string& alias ) { |
| auto p = keyword_aliases.find(alias); |
| if( p != keyword_aliases.end() ) return p->second; // error: do not overwrite |
| return keyword_aliases[alias] = keyword; |
| } |
| |
| /* |
| * Because numeric USAGE types don't have distinct tokens and may have aliases, |
| * we keep a table of their canonical names, which we use if we encounter an |
| * alias. |
| */ |
| struct bint_t { |
| int token; |
| cbl_field_type_t type; |
| uint32_t capacity; |
| bool signable; |
| }; |
| static const std::map <std::string, bint_t > binary_integers { |
| { "COMP-X", { COMPUTATIONAL, FldNumericBin5, 0xFF, false } }, |
| { "COMP-6", { COMPUTATIONAL, FldPacked, 0, false } }, |
| { "COMP-5", { COMPUTATIONAL, FldNumericBin5, 0, false } }, |
| { "COMP-4", { COMPUTATIONAL, FldNumericBinary, 0, true } }, |
| { "COMP-2", { COMPUTATIONAL, FldFloat, 8, false } }, |
| { "COMP-1", { COMPUTATIONAL, FldFloat, 4, false } }, |
| { "COMP", { COMPUTATIONAL, FldNumericBinary, 0, false } }, |
| { "COMPUTATIONAL-X", { COMPUTATIONAL, FldNumericBin5, 0xFF, false } }, |
| { "COMPUTATIONAL-6", { COMPUTATIONAL, FldPacked, 0, false } }, |
| { "COMPUTATIONAL-5", { COMPUTATIONAL, FldNumericBin5, 0, false } }, |
| { "COMPUTATIONAL-4", { COMPUTATIONAL, FldNumericBinary, 0, true } }, |
| { "COMPUTATIONAL-2", { COMPUTATIONAL, FldFloat, 8, false } }, |
| { "COMPUTATIONAL-1", { COMPUTATIONAL, FldFloat, 4, false } }, |
| { "COMPUTATIONAL", { COMPUTATIONAL, FldNumericBinary, 0, false } }, |
| { "BINARY", { BINARY_INTEGER, FldNumericBinary, 0, true } }, |
| { "BINARY-CHAR", { BINARY_INTEGER, FldNumericBin5, 1, true } }, |
| { "BINARY-SHORT", { BINARY_INTEGER, FldNumericBin5, 2, true } }, |
| { "BINARY-LONG", { BINARY_INTEGER, FldNumericBin5, 4, true } }, |
| { "BINARY-DOUBLE", { BINARY_INTEGER, FldNumericBin5, 8, true } }, |
| { "BINARY-LONG-LONG", { BINARY_INTEGER, FldNumericBin5, 8, true } }, |
| { "FLOAT-BINARY-32", { COMPUTATIONAL, FldFloat, 4, false } }, |
| { "FLOAT-BINARY-64", { COMPUTATIONAL, FldFloat, 8, false } }, |
| { "FLOAT-BINARY-128", { COMPUTATIONAL, FldFloat, 16, false } }, |
| { "FLOAT-EXTENDED", { COMPUTATIONAL, FldFloat, 16, false } }, |
| { "FLOAT-LONG", { COMPUTATIONAL, FldFloat, 8, false } }, |
| { "FLOAT-SHORT", { COMPUTATIONAL, FldFloat, 4, false } }, |
| }; |
| |
| static int |
| binary_integer_usage( const char name[]) { |
| // uname can't be cbl_name_t, because at this point name[] might have more |
| // than sizeof(cbl_name_t) characters. The length check comes later. |
| char *uname = xstrdup(name); |
| std::transform(name, name + strlen(name), uname, ftoupper); |
| |
| dbgmsg("%s:%d: checking %s in %zu keyword_aliases", |
| __func__, __LINE__, uname, |
| keyword_aliases.size() ); |
| |
| std::string key = uname; |
| auto alias = keyword_aliases.find(key); |
| if( alias != keyword_aliases.end() ) key = alias->second; |
| |
| auto p = binary_integers.find(key); |
| if( p == binary_integers.end() ) return 0; |
| |
| yylval.computational.type = p->second.type; |
| yylval.computational.capacity = p->second.capacity; |
| yylval.computational.signable = p->second.signable; |
| dbgmsg("%s:%d: %s has type %d", __func__, __LINE__, |
| uname, p->second.type ); |
| free(uname); |
| return p->second.token; |
| } |
| |
| static void |
| verify_ws( const YYLTYPE& loc, const char input[], char ch ) { |
| if( ! fisspace(ch) ) { |
| if( ! (dialect_mf() || dialect_gnu()) ) { |
| dialect_error(loc, "separator space required in %qs", input); |
| } |
| } |
| } |
| #define verify_ws(C) verify_ws(yylloc, yytext, C) |
| |
| int |
| binary_integer_usage_of( const char name[] ) { |
| cbl_name_t uname = {}; |
| std::transform(name, name + strlen(name), uname, ftoupper); |
| |
| auto p = binary_integers.find(uname); |
| if( p != binary_integers.end() ) { |
| int token = p->second.token; |
| switch( token ) { |
| case COMPUTATIONAL: |
| case BINARY_INTEGER: |
| return token; |
| default: |
| gcc_unreachable(); |
| assert(false); |
| } |
| } |
| return 0; |
| } |
| |
| static uint32_t |
| level_of( const char input[] ) { |
| unsigned int output = 0; |
| |
| if( input[0] == '0' ) input++; |
| |
| if( 1 != sscanf(input, "%u", &output) ) { |
| yywarn( "%s:%d: invalid level '%s'", __func__, __LINE__, input ); |
| } |
| |
| return output; |
| } |
| |
| static inline int |
| ndigit(int len) { |
| char *input = TOUPPER(yytext[0]) == 'V'? yytext + 1 : yytext; |
| int n = repeat_count(input); |
| return n == -1? len : n; |
| } |
| |
| static int |
| picset( int token ) { |
| static const char * const eop = orig_picture + sizeof(orig_picture); |
| char *p = orig_picture + strlen(orig_picture); |
| |
| if( eop < p + yyleng ) { |
| error_msg(yylloc, "PICTURE exceeds maximum size of %zu bytes", |
| sizeof(orig_picture) - 1); |
| } |
| snprintf( p, eop - p, "%s", yytext ); |
| return token; |
| } |
| |
| /** |
| ## Script and data to produce picture_t::followers. |
| ## Based on ISO Table 10. |
| #! /usr/bin/awk -f |
| |
| BEGIN { |
| str = "B0/ , . + +- +- CR/DB cs cs Z* Z* + + cs cs 9 AX S V P P 1 N E" |
| split(str, cols) |
| } |
| |
| $1 ~ /CR|DB|cs/ { next } |
| |
| 0 && !nlines++ { |
| for( i=0; i < length(cols); i++ ) { |
| print i, cols[i], "'" $i "'" |
| } |
| } |
| |
| $field == "x" { |
| if( ! nout++ ) { |
| printf "%2d: %5s: \"", field, cols[field - 1] |
| } |
| |
| gsub(/^ +| +$/, "", $1) |
| printf "%s", $1 |
| } |
| |
| END { |
| if( ! nout++ ) { |
| printf "%2d: %5s: \"", field, cols[field - 1] |
| } |
| print "\"" |
| } |
| |
| B x x x - x - - x - x x x x x x x x - x - x - x |
| 0 x x x - x - - x - x x x x x x x x - x - x - x |
| / x x x - x - - x - x x x x x x x x - x - x - x |
| , x x x - x - - x - x x x x x x x - - x - x |
| . x x - - x - - x - x - x - x - x |
| + - - - - - - - - - - - - - - - - - - - - - - - x |
| + |
| – |
| + x x x - - - - x x x x - - x x x - - x x x |
| CR x x x - - - - x x x x - - x x x - - x x x |
| DB x x x - - - - x x x x - - x x x - - x x x |
| cs - - - - x |
| cs x x x - x - - - - x x - - - - x - - x x x |
| |
| Z x x - - x - - x - x |
| * x x - - x - - x - x |
| Z x x x - x - - x - x x - - - - - - - x - x |
| * x x x - x - - x - x x - - - - - - - x - x |
| + x x - - - - - x - - - x |
| – x x - - - - - x - - - x |
| + x x x - - - - x - - - x x - - - - - x |
| – x x x - - - - x - - - x x - - - - - x |
| cs x x - - x - - - - - - - - x |
| cs x x x - x - - - - - - - - x x - - - x |
| |
| 9 x x x x x - - x - x - x - x - x x x x - x - - x |
| A x - - - - - - - - - - - - - - x x |
| X x - - - - - - - - - - - - - - x x |
| S |
| V x x - - x - - x - x - x - x - x - x - x |
| P x x - - x - - x - x - x - x - x - x - x |
| P - - - - x - - x - - - - - - - - - x x - x |
| 1 - - - - - - - - - - - - - - - - - - - - - x |
| N x - - - - - - - - - - - - - - - - - - - - - x |
| E x x x - x - - - - - - - - - - x |
| **/ |
| |
| class picture_t { |
| static const char dot = '.', comma = ','; |
| |
| typedef std::vector<std::string> followings_t; |
| static const std::map <char, followings_t> followers; |
| |
| const char * const begin; |
| const char *p, *pend; |
| size_t pos; |
| struct exclusions_t { // Nonzero if set, > 1 is false. |
| // crdb means CR/DB or +/-. |
| // pluses means 2 or more consecutive '+'. |
| // minuses means 2 or more consecutive '-'. |
| // "21) The symbol 'Z' and the symbol '*' are mutually exclusive " |
| // stars means '*' or Z. |
| unsigned short int crdb, currency, dot, pluses, minuses, stars, zzz; |
| exclusions_t() |
| : crdb(0), currency(0), dot(0), pluses(0), minuses(0), stars(0) |
| {} |
| } exclusions; |
| YYLTYPE loc; |
| |
| bool is_crdb() const { // input must be uppercase for CR/DB |
| if( p[0] == 'C' || p[0] == 'D' ) { |
| char input[3] = { p[0], p[1] }; |
| return ( 0 == strcmp(input, "CR") || 0 == strcmp(input, "DB") ); |
| } |
| return false; |
| } |
| |
| const char * match_paren( const char *paren ) const { |
| gcc_assert(paren[0] == '('); // start with opening paren |
| paren = std::find_if( paren, pend, |
| []( char ch ) { |
| return ch == '(' || ch == ')'; |
| } ); |
| if( *paren == '(' ) return nullptr; // no nesting |
| if( paren == pend ) return nullptr; |
| return ++paren; |
| } |
| |
| const char * next_not( char ch ) const { |
| return std::find_if( p, pend, |
| [ch = TOUPPER(ch)]( char next ) { |
| return ch != next; |
| } ); |
| } |
| |
| const char * valid_next( const char *p, const std::string& valid ) const { |
| if( p == pend || p + 1 == pend ) return pend; |
| if( p[1] == '(' ) { |
| return match_paren(++p); |
| } |
| auto pv = std::find(valid.begin(), valid.end(), TOUPPER(p[1])); |
| return pv != valid.end()? ++p : nullptr; |
| } |
| const char * valid_next( const char *p, |
| bool first = true, char ch = '\0' ) const { |
| if( p == pend || p + 1 == pend ) return pend; |
| if( p[0] == '(' ) { |
| if( (p = match_paren(p)) == nullptr ) return nullptr; |
| } |
| if( p[0] == '(' ) return nullptr; // consecutive parentheses |
| |
| int index = first? 0 : 1; |
| if( !ch ) ch = *p; // use current character unless overridden |
| auto valid = followers.find(TOUPPER(ch)); |
| if( valid == followers.end() ) { |
| YYLTYPE loc(yylloc); |
| loc.first_column += int(p - begin); |
| error_msg( loc, "PICTURE: strange character %qc, giving up", ch ); |
| return nullptr; |
| } |
| return valid_next(p, valid->second[index]); |
| } |
| |
| const char * start() { // start modifies exclusions, but not p |
| auto pnext = p; |
| |
| switch(TOUPPER(p[0])) { |
| case comma: case dot: |
| // use decimal_is_comma() |
| // 4: .: "B0/,+Z*+-9E" |
| exclusions.dot++; |
| pnext = valid_next(p, "B0/,+Z*+-9E"); |
| break; |
| case '+': case '-': |
| // 6: +-: "B0/,.Z*Z*9VPPE" |
| exclusions.crdb++; |
| pnext = next_not(p[0]); |
| if( p + 1 < pnext ) { |
| exclusions.pluses++; |
| } |
| pnext = valid_next(--pnext, "B0/,.Z*Z*9VPPE"); |
| break; |
| case 'Z': case '*': |
| exclusions.stars++; |
| pnext = next_not(p[0]); |
| break; |
| case 'S': |
| // 19: S: "9VP" |
| pnext = valid_next(p, "9VP"); |
| break; |
| } |
| |
| /* |
| * "For fixed editing sign control, the currency symbol, when used, shall |
| * be either the leftmost symbol in character-string-1, optionally preceded |
| * by one of the symbols '+' or '-' " |
| */ |
| if( pnext ) { |
| if( p == pnext || p[0] == '+' || p[0] == '-' ) { |
| if( symbol_currency(*pnext) ) { |
| exclusions.currency++; |
| pnext = next_not(*pnext); |
| pnext = valid_next(--pnext, true, '$'); |
| } |
| } |
| } |
| |
| return pnext; |
| } |
| |
| const char * next() { // modify state; do not modify position |
| auto pnext = p; |
| auto loc(picture_t::loc); |
| loc.first_column += int(p - begin); |
| |
| if( is_crdb() ) { |
| if( exclusions.crdb++ ) { |
| error_msg( loc, "PICTURE: CR/DB and %c/%c may appear only once", '+', '-' ); |
| return nullptr; |
| } |
| if( p + 2 != pend ) { |
| error_msg( loc, "PICTURE: CR/DB must appear at the end" ); |
| return nullptr; |
| } |
| return pend; |
| } |
| |
| if( symbol_currency(p[0]) ) { |
| if( false && exclusions.currency++ ) { // not enforced |
| error_msg( loc, "PICTURE: CURRENCY SYMBOL sequence may appear at most once" ); |
| return nullptr; |
| } |
| return valid_next(p, ! exclusions.dot, '$'); |
| } |
| |
| switch(TOUPPER(p[0])) { |
| case '(': |
| return match_paren(p); |
| break; |
| case 'B': case '0': case '/': |
| pnext = valid_next(p); |
| break; |
| case comma: |
| if( decimal_is_comma() ) { |
| if( exclusions.dot++ ) { |
| error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] ); |
| return nullptr; |
| } |
| pnext = valid_next(p, true, dot); |
| } else { |
| pnext = valid_next(p); |
| } |
| break; |
| case dot: |
| if( p + 1 == pend ) { |
| pnext = pend; |
| } else { |
| if( decimal_is_comma() ) { |
| pnext = valid_next(p, true, comma ); |
| } else { |
| if( exclusions.dot++ ) { |
| error_msg( loc, "PICTURE: %qc: may appear at most once", p[0] ); |
| return nullptr; |
| } |
| pnext = valid_next(p); |
| } |
| } |
| break; |
| |
| case '+': case '-': |
| // 7 is trailing sign; 13 & 14 are numeric. Leading sign handled by start(). |
| if( p + 1 == pend ) { |
| if( exclusions.crdb++ ) { |
| error_msg( loc, "PICTURE: %c/%c may appear at most once as a sign", '+', '-' ); |
| return nullptr; |
| } |
| pnext = pend; |
| } else { |
| pnext = next_not(p[0]); |
| if( p + 1 < pnext ) { |
| if( false && exclusions.pluses++ ) { // not enforced |
| error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] ); |
| return nullptr; |
| } |
| } |
| pnext = valid_next(pnext, ! exclusions.dot); |
| } |
| break; |
| |
| case 'Z': case '*': |
| if( false && exclusions.stars++ ) { // not enforced |
| error_msg( loc, "PICTURE: %qc: sequence may appear at most once", p[0] ); |
| return nullptr; |
| } |
| if( (pnext = next_not(p[0])) == nullptr ) return pnext; |
| pnext = valid_next(pnext, ! exclusions.dot); |
| break; |
| case 'P': |
| pnext = valid_next(pnext, ! exclusions.dot); |
| break; |
| case '9': |
| case 'A': case 'X': |
| case 'V': |
| case '1': |
| case 'N': |
| pnext = valid_next(p); |
| break; |
| case 'E': |
| pnext = valid_next(p, "+9"); |
| if( pnext && *pnext == '+' ) { |
| pnext = valid_next(p, "9"); |
| } |
| break; |
| default: |
| error_msg( loc, "PICTURE: %qc: invalid character", p[0] ); |
| return nullptr; |
| } |
| return pnext; |
| } |
| |
| public: |
| picture_t( const char *p, int len ) |
| : begin(p) |
| , p(p), pend(p + len) |
| , loc(yylloc) |
| { |
| assert(TOUPPER(*p) == 'P'); // as in PICTURE (or PICTURE IS) |
| // move p to start of picture string |
| while( (p = std::find_if(p, pend, fisspace)) != pend ) { |
| this->p = p = std::find_if(p, pend, |
| []( char ch ) { return ! fisspace(ch); } ); |
| } |
| assert(this->p != pend); |
| pos = this->p - begin; |
| } |
| |
| bool is_valid() { |
| if( !p ) return false; |
| if( (p = start()) == nullptr ) { |
| return false; |
| } |
| |
| while( p && p < pend) { |
| p = next(); |
| } |
| return p == pend; |
| } |
| |
| int starts_at() const { return pos; } |
| }; |
| |
| /* |
| * The Followers map gives 1 or 2 lists of valid characters following a |
| * character, the one in the key. If there are two lists, the correct one is |
| * determined by the caller based on the state of the picture string, i.e., |
| * what has been seen before. |
| */ |
| const std::map <char, picture_t::followings_t> picture_t::followers { |
| /* B0/ */ { 'B', {"B0/,.Z*+-9AXVPNE" } }, |
| /* B0/ */ { '0', {"B0/,.Z*+-9AXVPNE" } }, |
| /* B0/ */ { '/', {"B0/,.Z*+-9AXVPNE" } }, |
| /* , */ { ',', {"B0/,.Z*+-9VPE"} }, |
| /* . */ { '.', {"B0/,Z*+-9E"} }, |
| /* + { '+', "9" }, */ |
| /* +- */ { '+', {"B0/,.Z*9VPE", "" } }, |
| /* +- */ { '-', {"B0/,.Z*9VPE", "" } }, |
| /* CR/DB { 'C', "" }, */ |
| /* cs { 'c', "B0/,.Z*+-9VP" }, */ |
| /* cs { 'c', "+" }, */ |
| /* Z* */ { 'Z', {"B0/,.+Z*9VP", "B0/,+Z*"} }, |
| /* Z* */ { '*', {"B0/,.+Z*9VP", "B0/,+Z*"} }, |
| /* + */ { '+', {"B0/,.+-9VP", "B0/,+-"} }, |
| /* cs */ { '$', {"B0/,.+9VP", "B0/,+"} }, |
| /* 9 */ { '9', {"B0/,.+9AXVPE"} }, |
| /* AX */ { 'A', {"B0/9AX"} }, |
| /* AX */ { 'X', {"B0/9AX"} }, |
| /* S */ { 'S', {"9VP"} }, |
| /* V */ { 'V', {"B0/,+Z*+-9P"} }, |
| /* P */ { 'P', {"+VP", "B0/,+Z*9P"} }, |
| /* 1 */ { '1', {"1"} }, |
| /* N */ { 'N', {"B0/N"} }, |
| /* E */ { 'E', {"+9"} }, |
| }; |
| |
| /* |
| * Although picture_t::is_valid return a bool, it's not used. The validation |
| * routines emit messages where the error is detected. The entire string is |
| * subsequently parsed by the parser, which might otherwise accept an invalid |
| * string, but will usually emit a message of its own. |
| */ |
| static int |
| validate_picture() { |
| picture_t picture(yytext, yyleng); |
| picture.is_valid(); |
| return picture.starts_at(); |
| } |
| |
| static inline bool |
| is_integer_token( int *pvalue = NULL ) { |
| int v, n = 0; |
| if( pvalue == NULL ) pvalue = &v; |
| return 1 == sscanf(yytext, "%d%n", pvalue, &n) && n == yyleng; |
| } |
| |
| static bool need_nume = false; |
| bool need_nume_set( bool tf ) { |
| dbgmsg( "need_nume now %s", tf? "true" : "false" ); |
| return need_nume = tf; |
| } |
| |
| static int datetime_format_of( const char input[] ); |
| |
| static int symbol_function_token( const char name[] ) { |
| const auto e = symbol_function( 0, name ); |
| return e ? symbol_index(e) : 0; |
| } |
| |
| bool in_procedure_division(void ); |
| |
| static symbol_elem_t * |
| symbol_exists( const char name[] ) { |
| typedef std::map <std::string, size_t> name_cache_t; |
| static std::map <size_t, name_cache_t> cachemap; |
| |
| cbl_name_t lname; |
| std::transform( name, name + strlen(name) + 1, lname, tolower ); |
| auto& cache = cachemap[PROGRAM]; |
| |
| if( in_procedure_division() && cache.empty() ) { |
| for( auto e = symbols_begin(PROGRAM) + 1; |
| PROGRAM == e->program && e < symbols_end(); e++ ) { |
| if( e->type == SymFile ) { |
| cbl_file_t *f(cbl_file_of(e)); |
| cbl_name_t lname; |
| std::transform( f->name, f->name + strlen(f->name) + 1, lname, tolower ); |
| cache[lname] = symbol_index(e); |
| continue; |
| } |
| if( e->type == SymField ) { |
| auto f(cbl_field_of(e)); |
| cbl_name_t lname; |
| std::transform( f->name, f->name + strlen(f->name) + 1, lname, tolower ); |
| cache[lname] = symbol_index(e); |
| } |
| } |
| cache.erase(""); |
| } |
| auto p = cache.find(lname); |
| |
| if( p == cache.end() ) { |
| symbol_elem_t * e = symbol_field( PROGRAM, 0, name ); |
| return e; |
| } |
| |
| return symbol_at(p->second); |
| } |
| |
| static int |
| typed_name( const char name[] ) { |
| if( 0 == PROGRAM ) return NAME; |
| if( need_nume ) { need_nume_set(false); return NUME; } |
| |
| int token = repository_function_tok(name); |
| switch(token) { |
| case 0: |
| break; |
| case FUNCTION_UDF_0: |
| yylval.number = symbol_function_token(name); |
| __attribute__((fallthrough)); |
| default: |
| return token; |
| } |
| |
| struct symbol_elem_t *e = symbol_special( PROGRAM, name ); |
| if( e ) return cbl_special_name_of(e)->token; |
| |
| if( (token = redefined_token(name)) ) { return token; } |
| |
| e = symbol_exists( name ); |
| |
| auto type = e && e->type == SymField? cbl_field_of(e)->type : FldInvalid; |
| |
| switch(type) { |
| case FldLiteralA: |
| { |
| auto f = cbl_field_of(e); |
| if( is_constant(f) ) { |
| if( f->data.initial ) { |
| int token = cbl_figconst_tok(f->data.initial); |
| if( token ) return token; |
| } |
| int token = datetime_format_of(f->data.initial); |
| if( token ) { |
| yylval.string = xstrdup(f->data.initial); |
| return token; |
| } |
| } |
| } |
| __attribute__((fallthrough)); |
| case FldLiteralN: |
| { |
| const auto f = cbl_field_of(e); |
| if( type == FldLiteralN ) { |
| yylval.numstr.radix = |
| f->has_attr(hex_encoded_e)? hexadecimal_e : decimal_e; |
| yylval.numstr.string = xstrdup(f->data.initial); |
| return NUMSTR; |
| } |
| if( !f->has_attr(record_key_e) ) { // not a key-name literal |
| yylval.literal.set(f); |
| ydflval.string = yylval.literal.data; |
| return LITERAL; |
| } |
| } |
| __attribute__((fallthrough)); |
| case FldInvalid: |
| case FldGroup: |
| case FldForward: |
| case FldIndex: |
| case FldAlphanumeric: |
| case FldPacked: |
| case FldNumericDisplay: |
| case FldNumericEdited: |
| case FldAlphaEdited: |
| case FldNumericBinary: |
| case FldFloat: |
| case FldNumericBin5: |
| case FldPointer: |
| return NAME; |
| case FldSwitch: |
| return SWITCH; |
| case FldClass: |
| return cbl_field_of(e)->level == 88? NAME88 : CLASS_NAME; |
| break; |
| default: |
| yywarn("%s:%d: invalid symbol type %s for symbol %qs", |
| __func__, __LINE__, cbl_field_type_str(type), name); |
| return NAME; |
| } |
| return cbl_field_of(e)->level == 88? NAME88 : NAME; |
| } |
| |
| int |
| retype_name_token() { |
| return typed_name(ydflval.string); |
| } |
| |
| static char * |
| tmpstring_append( int len ) { |
| const char *extant = tmpstring == NULL ? "" : tmpstring; |
| char *s = xasprintf("%s%.*s", extant, len, yytext); |
| free(tmpstring); |
| return tmpstring = s; |
| } |
| |
| #define pop_return yy_pop_state(); return |
| |
| static bool is_not = false; |
| |
| static uint64_t |
| integer_of( const char input[], bool is_hex = false) { |
| uint64_t output = 0; |
| const char *fmt = is_hex? "%ul" : "%hl"; |
| |
| if( input[0] == '0' ) input++; |
| |
| if( 1 != sscanf(input, fmt, &output) ) { |
| yywarn( "%s:%d: invalid integer '%s'", __func__, __LINE__, input ); |
| } |
| |
| return output; |
| } |