gcc/cobol/scan_post.h - gcc.git - Git at Google

 /*
  * Copyright (c) 2021-2025 Symas Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of the Symas Corporation nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */

 static const char *
 start_condition_str( int sc ) {
   const char *state = "???";
   switch(sc) {
   case INITIAL: state = "INITIAL"; break;
   case addr_of: state = "addr_of"; break;
   case basis: state = "basis"; break;
   case bool_state: state = "bool_state"; break;
   case cdf_state: state = "cdf_state"; break;
   case classify: state = "classify"; break;
   case cobol_words: state = "cobol_words"; break;
   case comment_entries: state = "comment_entries"; break;
   case copy_state: state = "copy_state"; break;
   case date_state: state = "date_state"; break;
   case datetime_fmt: state = "datetime_fmt"; break;
   case dot_state: state = "dot_state"; break;
   case exception: state = "exception"; break;
   case field_level: state = "field_level"; break;
   case field_state: state = "field_state"; break;
   case function: state = "function"; break;
   case hex_state: state = "hex_state"; break;
   case ident_state: state = "ident_state"; break;
   case integer_count: state = "integer_count"; break;
   case name_state: state = "name_state"; break;
   case numeric_state: state = "numeric_state"; break;
   case numstr_state: state = "numstr_state"; break;
   case partial_name: state = "partial_name"; break;
   case picture: state = "picture"; break;
   case picture_count: state = "picture_count"; break;
   case procedure_div: state = "procedure_div"; break;
   case program_id_state: state = "program_id_state"; break;
   case quoted1: state = "quoted1"; break;
   case quoted2: state = "quoted2"; break;
   case quoteq: state = "quoteq"; break;
   case raising: state = "raising"; break;
   case sort_state: state = "sort_state"; break;
   case subscripts: state = "subscripts"; break;
   }
   return state;
 }

 static const char *
 start_condition_is() { return start_condition_str( YY_START ); }

 /*
  * Match datetime constants.
  *
  * A 78 or CONSTANT could have a special literal for formatted
  * date/time functions.
  */

 static int
 datetime_format_of( const char input[] ) {

   static const char date_fmt_b[] = "YYYYMMDD|YYYYDDD|YYYYWwwD";
   static const char date_fmt_e[] = "YYYY-MM-DD|YYYY-DDD|YYYY-Www-D";

   static const char time_fmt_b[] =
     "hhmmss([.,]s+)?|hhmmss([.,]s+)?Z|hhmmss([.,]s+)?[+]hhmm|";
   static const char time_fmt_e[] =
     "hh:mm:ss([.,]s+)?|hh:mm:ss([.,]s+)?Z|hh:mm:ss([.,]s+)?[+]hh:mm";

   static char date_pattern[ 3 * sizeof(date_fmt_e) ];
   static char time_pattern[ 3 * sizeof(time_fmt_e) ];
   static char datetime_pattern[ 6 * sizeof(time_fmt_e) ];

   static struct pattern_t {
     regex_t re;
     const char *regex;
     int token;
   } patterns[] = {
     { {}, datetime_pattern, DATETIME_FMT },
     { {}, date_pattern, DATE_FMT },
     { {}, time_pattern, TIME_FMT },
   }, * eopatterns = patterns + COUNT_OF(patterns);;

   // compile patterns
   if( ! date_pattern[0] ) {
     sprintf(date_pattern, "%s|%s", date_fmt_b, date_fmt_e);
     sprintf(time_pattern, "%s|%s", time_fmt_b, time_fmt_e);

     sprintf(datetime_pattern, "(%sT%s)|(%sT%s)",
             date_fmt_b, time_fmt_b,
             date_fmt_e, time_fmt_e);

     for( auto p = patterns; p < eopatterns; p++ ) {
       static const int cflags = REG_EXTENDED | REG_ICASE;
       int erc;

       if( 0 != (erc = regcomp(&p->re, p->regex, cflags)) ) {
         static char msg[80];
         regerror(erc, &p->re, msg, sizeof(msg));
         yywarn("%s:%d: %s: %s", __func__, __LINE__, keyword_str(p->token), msg);
       }
     }
   }

   // applies only in the datetime_fmt start condition
   if( datetime_fmt == YY_START ) {
     yy_pop_state();
     if( input == NULL ) return 0;

     // See if the input is a date, time, or datetime pattern string.
     static const int nmatch = 3;
     regmatch_t matches[nmatch];

     auto p = std::find_if( patterns, eopatterns,
                            [input, &matches]( auto& pattern ) {
                              auto erc = regexec( &pattern.re, input,
                                                  COUNT_OF(matches), matches, 0 );
                              return erc == 0;
                            } );

     return p != eopatterns? p->token : 0;
   }
   return 0;
 }


 /*
  * >>DEFINE, >>IF, and >>EVALUATE
  */

 static bool
 is_cdf_token( int token ) {
   switch(token) {
   case CDF_DEFINE:
   case CDF_DISPLAY:
   case CDF_IF:       case CDF_ELSE: case CDF_END_IF:
   case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
   case CDF_PUSH:
   case CDF_POP:
     return true;
   case CALL_COBOL:
   case CALL_VERBATIM:
   case COPY:
   case TURN:
     return true;
   }
   return false;
 }

 static bool
 is_cdf_condition_token( int token ) {
   switch(token) {
   case CDF_IF:       case CDF_ELSE: case CDF_END_IF:
   case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
     return true;
   }
   return false;
 }

 /*
  * IF and EVALUATE are partially parsed in cdf.y.  ELSE and WHEN, etc., are
  * valid only in context.
  */
 static bool
 valid_conditional_context( int token ) {
   switch(token) {
   case CDF_DEFINE:
   case CDF_IF:
   case CDF_EVALUATE:
     return true;
   case CDF_ELSE:
   case CDF_END_IF:
     return scanner_token() == CDF_IF;
   case CDF_WHEN:
   case CDF_END_EVALUATE:
     return scanner_token() == CDF_EVALUATE;
   }
   return true; // all other CDF tokens valid regardless of context
 }

 static bool
 run_cdf( int token ) {
   if( ! valid_conditional_context(token) ) {
     error_msg(yylloc, "CDF syntax error at '%s'", keyword_str(token));
     return false;
   }

   parsing.inject_token(token); // because it will be needed by CDF parser

   if( yy_flex_debug ) dbgmsg("CDF parser start with '%s'", keyword_str(token));

   parsing.parser_save(ydfparse);

   int erc = ydfparse();  // Parse the CDF directive.

   parsing.parser_restore();

   if( YY_START == cdf_state ) yy_pop_state();

   if( yy_flex_debug ) {
     dbgmsg("CDF parser returned %d, scanner SC <%s>", erc, start_condition_is());
   }

   return  0 == erc;
 }

 #include <queue>
 struct pending_token_t {
   int token;
   YYSTYPE value;
   pending_token_t( int token, YYSTYPE value ) : token(token), value(value) {}
 };
 #define PENDING(T) pending_token_t( (T), yylval )

 static std::queue<pending_token_t> pending_tokens;

 int next_token() {
   int token = lexer();
   return token;
 }

 extern int ydfchar;
 bool in_procedure_division(void);

 // act on CDF tokens
 int
 prelex() {
   static bool in_cdf = false;
   int token = next_token();

   if( in_cdf ) { return token; }
   if( ! is_cdf_token(token) ) { return token; }

   in_cdf = true;

   assert(is_cdf_token(token));

   while( is_cdf_token(token) ) {

     if( ! run_cdf(token) ) {
       dbgmsg( ">>CDF parser failed, ydfchar %d", ydfchar );
     }
     // Return the CDF's discarded lookahead token, if extant.
     token = ydfchar > 0? ydfchar : next_token();
     if( token == NO_CONDITION && parsing.at_eof() ) {
       return YYEOF;
     }

     // Reenter cdf parser only if next token could affect parsing state.
     if( ! parsing.on() && ! is_cdf_condition_token(token) ) break;
   }

   if( yy_flex_debug ) {
     dbgmsg("scanner SC <%s>", start_condition_is());
   }

   if( YY_START == copy_state || YY_START == cdf_state ) {
     if( token == NAME ) {
       auto tok = keyword_tok(ydflval.string);
       if( tok ) token = tok;
     }
     yy_pop_state();
     dbgmsg("scanner SC <%s>, token now %s",
            start_condition_is(), keyword_str(token));
   }

   /*
    * The final, rejected CDF token might be a LEVEL number.
    */
   if( YY_START == field_state && level_needed() ) {
     switch( token ) {
     case NUMSTR:
       if( yy_flex_debug ) yywarn("final token is NUMSTR");
       yylval.number = level_of(yylval.numstr.string);
       token = LEVEL;
       break;
     case YDF_NUMBER:
       if( yy_flex_debug ) yywarn("final token is %<YDF_NUMBER%>");
       yylval.number = ydflval.number;
       token = LEVEL;
       break;
     }
     if( token == LEVEL ) {
       switch(yylval.number) {
       case 66:
         token = LEVEL66;
         break;
       case 78:
         token = LEVEL78;
         break;
       case 88:
         token = LEVEL78;
         break;
       }
     }
   }

   dbgmsg( ">>CDF parser done, %s returning "
           "%s (because final_token %s, lookhead %d) on line %d", __func__,
           keyword_str(token), keyword_str(final_token),
           ydfchar, yylineno );
   in_cdf = false;
   return token;
 }

 /* There are 2 parsers and one scanner.
  * yyparse calls yylex.
  * yylex calls prelex
  *             prelex calls lexer, the scanner produced by flex.
  *                          lexer reads input from yyin via lexer_input.
  *
  * prelex intercepts CDF statements, each of which it parses with ydfparse.
  * ydfparse affects CDF variables, which may affect how yylex treats
  * the input stream.
  *
  * Because the lexer is called recursively:
  *
  *   yyparse -> yylex -> ydfparse -> yylex
  *
  * the global state of the scanner has changed when ydfparse returns.  Part of
  * that state is the unused lookahead token that ydfparse discarded, stored in
  * final_token.  prelex then returns final_token as its own, which is duly
  * returned to yyparse.
  */

 int
 yylex(void) {
   static bool produce_next_sentence_target = false;
   int token = parsing.pending_token();

   if( parsing.at_eof() ) return  YYEOF;
   if( token ) return token;

   /*
    * NEXT SENTENCE jumps to an implied CONTINUE at the next dot ('.').
    * Documentation says variously that the implied CONTINUE is before or after
    * that dot, but the meaning is one: after the statement that precedes the
    * dot.
    *
    * When the lexer encounters the dot, it returns it to the parser, which may
    * use it as a look-ahead token to decide the grammar production.  By the
    * time it returns to the lexer looking for its next token, the parser will
    * have taken whatever actions the dot decided.  At that point, the lexer
    * injects the label that NEXT SENTENCE jumps to.
    */
   if( produce_next_sentence_target ) {
     next_sentence_label(next_sentence);
     produce_next_sentence_target = false;
   }

   do {
     token = prelex();
     if( yy_flex_debug ) {
       if( parsing.in_cdf() ) {
         dbgmsg( "%s:%d: <%s> routing %s to CDF parser", __func__, __LINE__,
                start_condition_is(), keyword_str(token) );
       } else if( !parsing.on() ) {
         dbgmsg( "eating %s because conditional compilation is FALSE",
                  keyword_str(token) );
       }
     }

   } while( token && ! parsing.feed_a_parser() );

   if( next_sentence && token == '.' ) {
     produce_next_sentence_target = true;
   }

   if( parsing.normal() ) {
     final_token = token;
   }

   if( token == YYEOF && parsing.in_cdf() ) {
     if( yy_flex_debug) dbgmsg("deflecting EOF");
     parsing.at_eof(true);
     return NO_CONDITION;
   }

   return token;
 }
	/*
	* Copyright (c) 2021-2025 Symas Corporation
	*
	* Redistribution and use in source and binary forms, with or without
	* modification, are permitted provided that the following conditions are
	* met:
	*
	* * Redistributions of source code must retain the above copyright
	* notice, this list of conditions and the following disclaimer.
	* * Redistributions in binary form must reproduce the above
	* copyright notice, this list of conditions and the following disclaimer
	* in the documentation and/or other materials provided with the
	* distribution.
	* * Neither the name of the Symas Corporation nor the names of its
	* contributors may be used to endorse or promote products derived from
	* this software without specific prior written permission.
	*
	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
	*/

	static const char *
	start_condition_str( int sc ) {
	const char *state = "???";
	switch(sc) {
	case INITIAL: state = "INITIAL"; break;
	case addr_of: state = "addr_of"; break;
	case basis: state = "basis"; break;
	case bool_state: state = "bool_state"; break;
	case cdf_state: state = "cdf_state"; break;
	case classify: state = "classify"; break;
	case cobol_words: state = "cobol_words"; break;
	case comment_entries: state = "comment_entries"; break;
	case copy_state: state = "copy_state"; break;
	case date_state: state = "date_state"; break;
	case datetime_fmt: state = "datetime_fmt"; break;
	case dot_state: state = "dot_state"; break;
	case exception: state = "exception"; break;
	case field_level: state = "field_level"; break;
	case field_state: state = "field_state"; break;
	case function: state = "function"; break;
	case hex_state: state = "hex_state"; break;
	case ident_state: state = "ident_state"; break;
	case integer_count: state = "integer_count"; break;
	case name_state: state = "name_state"; break;
	case numeric_state: state = "numeric_state"; break;
	case numstr_state: state = "numstr_state"; break;
	case partial_name: state = "partial_name"; break;
	case picture: state = "picture"; break;
	case picture_count: state = "picture_count"; break;
	case procedure_div: state = "procedure_div"; break;
	case program_id_state: state = "program_id_state"; break;
	case quoted1: state = "quoted1"; break;
	case quoted2: state = "quoted2"; break;
	case quoteq: state = "quoteq"; break;
	case raising: state = "raising"; break;
	case sort_state: state = "sort_state"; break;
	case subscripts: state = "subscripts"; break;
	}
	return state;
	}

	static const char *
	start_condition_is() { return start_condition_str( YY_START ); }

	/*
	* Match datetime constants.
	*
	* A 78 or CONSTANT could have a special literal for formatted
	* date/time functions.
	*/

	static int
	datetime_format_of( const char input[] ) {

	static const char date_fmt_b[] = "YYYYMMDD\|YYYYDDD\|YYYYWwwD";
	static const char date_fmt_e[] = "YYYY-MM-DD\|YYYY-DDD\|YYYY-Www-D";

	static const char time_fmt_b[] =
	"hhmmss([.,]s+)?\|hhmmss([.,]s+)?Z\|hhmmss([.,]s+)?[+]hhmm\|";
	static const char time_fmt_e[] =
	"hh:mm:ss([.,]s+)?\|hh:mm:ss([.,]s+)?Z\|hh:mm:ss([.,]s+)?[+]hh:mm";

	static char date_pattern[ 3 * sizeof(date_fmt_e) ];
	static char time_pattern[ 3 * sizeof(time_fmt_e) ];
	static char datetime_pattern[ 6 * sizeof(time_fmt_e) ];

	static struct pattern_t {
	regex_t re;
	const char *regex;
	int token;
	} patterns[] = {
	{ {}, datetime_pattern, DATETIME_FMT },
	{ {}, date_pattern, DATE_FMT },
	{ {}, time_pattern, TIME_FMT },
	}, * eopatterns = patterns + COUNT_OF(patterns);;

	// compile patterns
	if( ! date_pattern[0] ) {
	sprintf(date_pattern, "%s\|%s", date_fmt_b, date_fmt_e);
	sprintf(time_pattern, "%s\|%s", time_fmt_b, time_fmt_e);

	sprintf(datetime_pattern, "(%sT%s)\|(%sT%s)",
	date_fmt_b, time_fmt_b,
	date_fmt_e, time_fmt_e);

	for( auto p = patterns; p < eopatterns; p++ ) {
	static const int cflags = REG_EXTENDED \| REG_ICASE;
	int erc;

	if( 0 != (erc = regcomp(&p->re, p->regex, cflags)) ) {
	static char msg[80];
	regerror(erc, &p->re, msg, sizeof(msg));
	yywarn("%s:%d: %s: %s", __func__, __LINE__, keyword_str(p->token), msg);
	}
	}
	}

	// applies only in the datetime_fmt start condition
	if( datetime_fmt == YY_START ) {
	yy_pop_state();
	if( input == NULL ) return 0;

	// See if the input is a date, time, or datetime pattern string.
	static const int nmatch = 3;
	regmatch_t matches[nmatch];

	auto p = std::find_if( patterns, eopatterns,
	[input, &matches]( auto& pattern ) {
	auto erc = regexec( &pattern.re, input,
	COUNT_OF(matches), matches, 0 );
	return erc == 0;
	} );

	return p != eopatterns? p->token : 0;
	}
	return 0;
	}


	/*
	* >>DEFINE, >>IF, and >>EVALUATE
	*/

	static bool
	is_cdf_token( int token ) {
	switch(token) {
	case CDF_DEFINE:
	case CDF_DISPLAY:
	case CDF_IF: case CDF_ELSE: case CDF_END_IF:
	case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
	case CDF_PUSH:
	case CDF_POP:
	return true;
	case CALL_COBOL:
	case CALL_VERBATIM:
	case COPY:
	case TURN:
	return true;
	}
	return false;
	}

	static bool
	is_cdf_condition_token( int token ) {
	switch(token) {
	case CDF_IF: case CDF_ELSE: case CDF_END_IF:
	case CDF_EVALUATE: case CDF_WHEN: case CDF_END_EVALUATE:
	return true;
	}
	return false;
	}

	/*
	* IF and EVALUATE are partially parsed in cdf.y. ELSE and WHEN, etc., are
	* valid only in context.
	*/
	static bool
	valid_conditional_context( int token ) {
	switch(token) {
	case CDF_DEFINE:
	case CDF_IF:
	case CDF_EVALUATE:
	return true;
	case CDF_ELSE:
	case CDF_END_IF:
	return scanner_token() == CDF_IF;
	case CDF_WHEN:
	case CDF_END_EVALUATE:
	return scanner_token() == CDF_EVALUATE;
	}
	return true; // all other CDF tokens valid regardless of context
	}

	static bool
	run_cdf( int token ) {
	if( ! valid_conditional_context(token) ) {
	error_msg(yylloc, "CDF syntax error at '%s'", keyword_str(token));
	return false;
	}

	parsing.inject_token(token); // because it will be needed by CDF parser

	if( yy_flex_debug ) dbgmsg("CDF parser start with '%s'", keyword_str(token));

	parsing.parser_save(ydfparse);

	int erc = ydfparse(); // Parse the CDF directive.

	parsing.parser_restore();

	if( YY_START == cdf_state ) yy_pop_state();

	if( yy_flex_debug ) {
	dbgmsg("CDF parser returned %d, scanner SC <%s>", erc, start_condition_is());
	}

	return 0 == erc;
	}

	#include <queue>
	struct pending_token_t {
	int token;
	YYSTYPE value;
	pending_token_t( int token, YYSTYPE value ) : token(token), value(value) {}
	};
	#define PENDING(T) pending_token_t( (T), yylval )

	static std::queue<pending_token_t> pending_tokens;

	int next_token() {
	int token = lexer();
	return token;
	}

	extern int ydfchar;
	bool in_procedure_division(void);

	// act on CDF tokens
	int
	prelex() {
	static bool in_cdf = false;
	int token = next_token();

	if( in_cdf ) { return token; }
	if( ! is_cdf_token(token) ) { return token; }

	in_cdf = true;

	assert(is_cdf_token(token));

	while( is_cdf_token(token) ) {

	if( ! run_cdf(token) ) {
	dbgmsg( ">>CDF parser failed, ydfchar %d", ydfchar );
	}
	// Return the CDF's discarded lookahead token, if extant.
	token = ydfchar > 0? ydfchar : next_token();
	if( token == NO_CONDITION && parsing.at_eof() ) {
	return YYEOF;
	}

	// Reenter cdf parser only if next token could affect parsing state.
	if( ! parsing.on() && ! is_cdf_condition_token(token) ) break;
	}

	if( yy_flex_debug ) {
	dbgmsg("scanner SC <%s>", start_condition_is());
	}

	if( YY_START == copy_state \|\| YY_START == cdf_state ) {
	if( token == NAME ) {
	auto tok = keyword_tok(ydflval.string);
	if( tok ) token = tok;
	}
	yy_pop_state();
	dbgmsg("scanner SC <%s>, token now %s",
	start_condition_is(), keyword_str(token));
	}

	/*
	* The final, rejected CDF token might be a LEVEL number.
	*/
	if( YY_START == field_state && level_needed() ) {
	switch( token ) {
	case NUMSTR:
	if( yy_flex_debug ) yywarn("final token is NUMSTR");
	yylval.number = level_of(yylval.numstr.string);
	token = LEVEL;
	break;
	case YDF_NUMBER:
	if( yy_flex_debug ) yywarn("final token is %<YDF_NUMBER%>");
	yylval.number = ydflval.number;
	token = LEVEL;
	break;
	}
	if( token == LEVEL ) {
	switch(yylval.number) {
	case 66:
	token = LEVEL66;
	break;
	case 78:
	token = LEVEL78;
	break;
	case 88:
	token = LEVEL78;
	break;
	}
	}
	}

	dbgmsg( ">>CDF parser done, %s returning "
	"%s (because final_token %s, lookhead %d) on line %d", __func__,
	keyword_str(token), keyword_str(final_token),
	ydfchar, yylineno );
	in_cdf = false;
	return token;
	}

	/* There are 2 parsers and one scanner.
	* yyparse calls yylex.
	* yylex calls prelex
	* prelex calls lexer, the scanner produced by flex.
	* lexer reads input from yyin via lexer_input.
	*
	* prelex intercepts CDF statements, each of which it parses with ydfparse.
	* ydfparse affects CDF variables, which may affect how yylex treats
	* the input stream.
	*
	* Because the lexer is called recursively:
	*
	* yyparse -> yylex -> ydfparse -> yylex
	*
	* the global state of the scanner has changed when ydfparse returns. Part of
	* that state is the unused lookahead token that ydfparse discarded, stored in
	* final_token. prelex then returns final_token as its own, which is duly
	* returned to yyparse.
	*/

	int
	yylex(void) {
	static bool produce_next_sentence_target = false;
	int token = parsing.pending_token();

	if( parsing.at_eof() ) return YYEOF;
	if( token ) return token;

	/*
	* NEXT SENTENCE jumps to an implied CONTINUE at the next dot ('.').
	* Documentation says variously that the implied CONTINUE is before or after
	* that dot, but the meaning is one: after the statement that precedes the
	* dot.
	*
	* When the lexer encounters the dot, it returns it to the parser, which may
	* use it as a look-ahead token to decide the grammar production. By the
	* time it returns to the lexer looking for its next token, the parser will
	* have taken whatever actions the dot decided. At that point, the lexer
	* injects the label that NEXT SENTENCE jumps to.
	*/
	if( produce_next_sentence_target ) {
	next_sentence_label(next_sentence);
	produce_next_sentence_target = false;
	}

	do {
	token = prelex();
	if( yy_flex_debug ) {
	if( parsing.in_cdf() ) {
	dbgmsg( "%s:%d: <%s> routing %s to CDF parser", __func__, __LINE__,
	start_condition_is(), keyword_str(token) );
	} else if( !parsing.on() ) {
	dbgmsg( "eating %s because conditional compilation is FALSE",
	keyword_str(token) );
	}
	}

	} while( token && ! parsing.feed_a_parser() );

	if( next_sentence && token == '.' ) {
	produce_next_sentence_target = true;
	}

	if( parsing.normal() ) {
	final_token = token;
	}

	if( token == YYEOF && parsing.in_cdf() ) {
	if( yy_flex_debug) dbgmsg("deflecting EOF");
	parsing.at_eof(true);
	return NO_CONDITION;
	}

	return token;
	}