gcc/ada/scans.ads - gcc - Git at Google

 ------------------------------------------------------------------------------
 --                                                                          --
 --                         GNAT COMPILER COMPONENTS                         --
 --                                                                          --
 --                                S C A N S                                 --
 --                                                                          --
 --                                 S p e c                                  --
 --                                                                          --
 --          Copyright (C) 1992-2023, Free Software Foundation, Inc.         --
 --                                                                          --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
 -- for  more details.  You should have  received  a copy of the GNU General --
 -- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
 -- http://www.gnu.org/licenses for a complete copy of the license.          --
 --                                                                          --
 -- GNAT was originally developed  by the GNAT team at  New York University. --
 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
 --                                                                          --
 ------------------------------------------------------------------------------

 with Namet;  use Namet;
 with Types;  use Types;
 with Uintp;  use Uintp;
 with Urealp; use Urealp;

 package Scans is

 --  The scanner maintains a current state in the global variables defined
 --  in this package. The call to the Scan routine advances this state to
 --  the next token. The state is initialized by the call to one of the
 --  initialization routines in Sinput.

    --  The following type is used to identify token types returned by Scan.
    --  The class column in this table indicates the token classes which
    --  apply to the token, as defined by subsequent subtype declarations.

    type Token_Type is (

       --  Token name          Token type   Class(es)

       Tok_Integer_Literal, -- numeric lit  Literal, Lit_Or_Name

       Tok_Real_Literal,    -- numeric lit  Literal, Lit_Or_Name

       Tok_String_Literal,  -- string lit   Literal. Lit_Or_Name

       Tok_Char_Literal,    -- char lit     Name, Literal. Lit_Or_Name

       Tok_Operator_Symbol, -- op symbol    Name, Literal, Lit_Or_Name, Desig

       Tok_Identifier,      -- identifier   Name, Lit_Or_Name, Desig

       Tok_At_Sign,         -- @  AI12-0125-3 : target name

       Tok_Double_Asterisk, -- **

       Tok_Ampersand,       -- &            Binary_Addop
       Tok_Minus,           -- -            Binary_Addop, Unary_Addop
       Tok_Plus,            -- +            Binary_Addop, Unary_Addop

       Tok_Asterisk,        -- *            Mulop
       Tok_Mod,             -- MOD          Mulop
       Tok_Rem,             -- REM          Mulop
       Tok_Slash,           -- /            Mulop

       Tok_New,             -- NEW

       Tok_Abs,             -- ABS
       Tok_Others,          -- OTHERS
       Tok_Null,            -- NULL

       --  Note: Tok_Raise is in no categories now, it used to be Cterm, Eterm,
       --  After_SM, but now that Ada 2012 has added raise expressions, the
       --  raise token can appear anywhere. Note in particular that Tok_Raise
       --  being in Eterm stopped the parser from recognizing "return raise
       --  exception-name". This degrades error recovery slightly, and perhaps
       --  we could do better, but not worth the effort.

       --  Ada 2022 introduces square brackets as delimiters for array and
       --  container aggregates.

       --  The left delimiter token of interpolated strings, and tokens { and }
       --  of interpolated expressions are currently placed in no category since
       --  they don't fit well in the existing categories.

       Tok_Left_Interpolated_String, -- f"
       Tok_Left_Curly_Bracket,       -- {
       Tok_Raise,                    -- RAISE
       Tok_Right_Curly_Bracket,      -- }

       Tok_Dot,             -- .            Namext
       Tok_Apostrophe,      -- '            Namext

       Tok_Left_Bracket,    -- [            Namext
       Tok_Left_Paren,      -- (            Namext, Consk

       Tok_Delta,           -- DELTA        Atkwd, Sterm, Consk
       Tok_Digits,          -- DIGITS       Atkwd, Sterm, Consk
       Tok_Range,           -- RANGE        Atkwd, Sterm, Consk

       Tok_Right_Paren,     -- )            Sterm
       Tok_Right_Bracket,   -- ]            Sterm
       Tok_Comma,           -- ,            Sterm

       Tok_And,             -- AND          Logop, Sterm
       Tok_Or,              -- OR           Logop, Sterm
       Tok_Xor,             -- XOR          Logop, Sterm

       Tok_Less,            -- <            Relop, Sterm
       Tok_Equal,           -- =            Relop, Sterm
       Tok_Greater,         -- >            Relop, Sterm
       Tok_Not_Equal,       -- /=           Relop, Sterm
       Tok_Greater_Equal,   -- >=           Relop, Sterm
       Tok_Less_Equal,      -- <=           Relop, Sterm

       Tok_In,              -- IN           Relop, Sterm
       Tok_Not,             -- NOT          Relop, Sterm

       Tok_Box,             -- <>           Relop, Eterm, Sterm
       Tok_Colon_Equal,     -- :=           Eterm, Sterm
       Tok_Colon,           -- :            Eterm, Sterm
       Tok_Greater_Greater, -- >>           Eterm, Sterm

       Tok_Abstract,        -- ABSTRACT     Eterm, Sterm
       Tok_Access,          -- ACCESS       Eterm, Sterm
       Tok_Aliased,         -- ALIASED      Eterm, Sterm
       Tok_All,             -- ALL          Eterm, Sterm
       Tok_Array,           -- ARRAY        Eterm, Sterm
       Tok_At,              -- AT           Eterm, Sterm
       Tok_Body,            -- BODY         Eterm, Sterm
       Tok_Constant,        -- CONSTANT     Eterm, Sterm
       Tok_Do,              -- DO           Eterm, Sterm
       Tok_Is,              -- IS           Eterm, Sterm
       Tok_Interface,       -- INTERFACE    Eterm, Sterm
       Tok_Limited,         -- LIMITED      Eterm, Sterm
       Tok_Of,              -- OF           Eterm, Sterm
       Tok_Out,             -- OUT          Eterm, Sterm
       Tok_Record,          -- RECORD       Eterm, Sterm
       Tok_Renames,         -- RENAMES      Eterm, Sterm
       Tok_Reverse,         -- REVERSE      Eterm, Sterm
       Tok_Some,            -- SOME         Eterm, Sterm
       Tok_Tagged,          -- TAGGED       Eterm, Sterm
       Tok_Then,            -- THEN         Eterm, Sterm

       Tok_Less_Less,       -- <<           Eterm, Sterm, After_SM

       Tok_Abort,           -- ABORT        Eterm, Sterm, After_SM
       Tok_Accept,          -- ACCEPT       Eterm, Sterm, After_SM
       Tok_Case,            -- CASE         Eterm, Sterm, After_SM
       Tok_Delay,           -- DELAY        Eterm, Sterm, After_SM
       Tok_Else,            -- ELSE         Eterm, Sterm, After_SM
       Tok_Elsif,           -- ELSIF        Eterm, Sterm, After_SM
       Tok_End,             -- END          Eterm, Sterm, After_SM
       Tok_Exception,       -- EXCEPTION    Eterm, Sterm, After_SM
       Tok_Exit,            -- EXIT         Eterm, Sterm, After_SM
       Tok_Goto,            -- GOTO         Eterm, Sterm, After_SM
       Tok_If,              -- IF           Eterm, Sterm, After_SM
       Tok_Pragma,          -- PRAGMA       Eterm, Sterm, After_SM
       Tok_Requeue,         -- REQUEUE      Eterm, Sterm, After_SM
       Tok_Return,          -- RETURN       Eterm, Sterm, After_SM
       Tok_Select,          -- SELECT       Eterm, Sterm, After_SM
       Tok_Terminate,       -- TERMINATE    Eterm, Sterm, After_SM
       Tok_Until,           -- UNTIL        Eterm, Sterm, After_SM
       Tok_When,            -- WHEN         Eterm, Sterm, After_SM

       Tok_Begin,           -- BEGIN        Eterm, Sterm, After_SM, Labeled_Stmt
       Tok_Declare,         -- DECLARE      Eterm, Sterm, After_SM, Labeled_Stmt
       Tok_For,             -- FOR          Eterm, Sterm, After_SM, Labeled_Stmt
       Tok_Loop,            -- LOOP         Eterm, Sterm, After_SM, Labeled_Stmt
       Tok_While,           -- WHILE        Eterm, Sterm, After_SM, Labeled_Stmt

       Tok_Entry,           -- ENTRY        Eterm, Sterm, Declk, Deckn, After_SM
       Tok_Protected,       -- PROTECTED    Eterm, Sterm, Declk, Deckn, After_SM
       Tok_Task,            -- TASK         Eterm, Sterm, Declk, Deckn, After_SM
       Tok_Type,            -- TYPE         Eterm, Sterm, Declk, Deckn, After_SM
       Tok_Subtype,         -- SUBTYPE      Eterm, Sterm, Declk, Deckn, After_SM
       Tok_Overriding,      -- OVERRIDING   Eterm, Sterm, Declk, Declk, After_SM
       Tok_Synchronized,    -- SYNCHRONIZED Eterm, Sterm, Declk, Deckn, After_SM
       Tok_Use,             -- USE          Eterm, Sterm, Declk, Deckn, After_SM

       Tok_Function,        -- FUNCTION     Eterm, Sterm, Cunit, Declk, After_SM
       Tok_Generic,         -- GENERIC      Eterm, Sterm, Cunit, Declk, After_SM
       Tok_Package,         -- PACKAGE      Eterm, Sterm, Cunit, Declk, After_SM
       Tok_Procedure,       -- PROCEDURE    Eterm, Sterm, Cunit, Declk, After_SM

       Tok_Private,         -- PRIVATE      Eterm, Sterm, Cunit, After_SM
       Tok_With,            -- WITH         Eterm, Sterm, Cunit, After_SM
       Tok_Separate,        -- SEPARATE     Eterm, Sterm, Cunit, After_SM

       Tok_EOF,             -- End of file  Eterm, Sterm, Cterm, After_SM

       Tok_Semicolon,       -- ;            Eterm, Sterm, Cterm

       Tok_Arrow,           -- =>           Sterm, Cterm, Chtok

       Tok_Vertical_Bar,    -- |            Cterm, Sterm, Chtok

       Tok_Dot_Dot,         -- ..           Sterm, Chtok

       Tok_Project,
       Tok_Extends,
       Tok_External,
       Tok_External_As_List,
       --  These four entries represent keywords for the project file language
       --  and can be returned only in the case of scanning project files.

       Tok_Comment,
       --  This entry is used when scanning project files (where it represents
       --  an entire comment), and in preprocessing with the -C switch set
       --  (where it represents just the "--" of a comment). For the project
       --  file case, the text of the comment is stored in Comment_Id.

       Tok_End_Of_Line,
       --  Represents an end of line. Not used during normal compilation scans
       --  where end of line is ignored. Active for preprocessor scanning.

       Tok_Special,
       --  Special character used by the preprocessor. The character itself is
       --  stored in Special_Character below.

       No_Token);
       --  No_Token is used for initializing Token values to indicate that
       --  no value has been set yet.

    function Keyword_Name (Token : Token_Type) return Name_Id;
    --  Given a token that is a reserved word, return the corresponding Name_Id
    --  in lower case. E.g. Keyword_Name (Tok_Begin) = Name_Find ("begin").
    --  It is an error to pass any other kind of token.

    --  Note: in the RM, operator symbol is a special case of string literal.
    --  We distinguish at the lexical level in this compiler, since there are
    --  many syntactic situations in which only an operator symbol is allowed.

    --  The following subtype declarations group the token types into classes.
    --  These are used for class tests in the parser.

    subtype Token_Class_Numeric_Literal is
      Token_Type range Tok_Integer_Literal .. Tok_Real_Literal;
    --  Numeric literal

    subtype Token_Class_Literal is
      Token_Type range Tok_Integer_Literal .. Tok_Operator_Symbol;
    --  Literal

    subtype Token_Class_Lit_Or_Name is
      Token_Type range Tok_Integer_Literal .. Tok_Identifier;

    subtype Token_Class_Binary_Addop is
      Token_Type range Tok_Ampersand .. Tok_Plus;
    --  Binary adding operator (& + -)

    subtype Token_Class_Unary_Addop is
      Token_Type range Tok_Minus .. Tok_Plus;
    --  Unary adding operator (+ -)

    subtype Token_Class_Mulop is
      Token_Type range Tok_Asterisk .. Tok_Slash;
    --  Multiplying operator

    subtype Token_Class_Logop is
      Token_Type range Tok_And .. Tok_Xor;
    --  Logical operator (and, or, xor)

    subtype Token_Class_Relop is
      Token_Type range Tok_Less .. Tok_Box;
    --  Relational operator (= /= < <= > >= not, in plus <> to catch misuse
    --  of Pascal style not equal operator).

    subtype Token_Class_Name is
    Token_Type range Tok_Char_Literal .. Tok_At_Sign;
    --  First token of name (4.1),
    --    (identifier, char literal, operator symbol)
    --  Includes '@' after Ada2012 corrigendum.

    subtype Token_Class_Desig is
      Token_Type range Tok_Operator_Symbol .. Tok_At_Sign;
    --  Token which can be a Designator (identifier, operator symbol)

    subtype Token_Class_Namext is
      Token_Type range Tok_Dot .. Tok_Left_Paren;
    --  Name extension tokens. These are tokens which can appear immediately
    --  after a name to extend it recursively (period, quote, left paren)

    subtype Token_Class_Consk is
      Token_Type range Tok_Left_Paren .. Tok_Range;
    --  Keywords which can start constraint
    --    (left paren, delta, digits, range)

    subtype Token_Class_Eterm is
      Token_Type range Tok_Colon_Equal .. Tok_Semicolon;
    --  Expression terminators. These tokens can never appear within a simple
    --  expression. This is used for error recovery purposes (if we encounter
    --  an error in an expression, we simply scan to the next Eterm token).

    subtype Token_Class_Sterm is
      Token_Type range Tok_Delta .. Tok_Dot_Dot;
    --  Simple_Expression terminators. A Simple_Expression must be followed
    --  by a token in this class, or an error message is issued complaining
    --  about a missing binary operator.

    subtype Token_Class_Atkwd is
      Token_Type range Tok_Delta .. Tok_Range;
    --  Attribute keywords. This class includes keywords which can be used
    --  as an Attribute_Designator, namely DELTA, DIGITS and RANGE

    subtype Token_Class_Cterm is
      Token_Type range Tok_EOF .. Tok_Vertical_Bar;
    --  Choice terminators. These tokens terminate a choice. This is used for
    --  error recovery purposes (if we encounter an error in a Choice, we
    --  simply scan to the next Cterm token).

    subtype Token_Class_Chtok is
      Token_Type range Tok_Arrow .. Tok_Dot_Dot;
    --  Choice tokens. These tokens signal a choice when used in an Aggregate

    subtype Token_Class_Cunit is
      Token_Type range Tok_Function .. Tok_Separate;
    --  Tokens which can begin a compilation unit

    subtype Token_Class_Declk is
      Token_Type range Tok_Entry .. Tok_Procedure;
    --  Keywords which start a declaration

    subtype Token_Class_Deckn is
      Token_Type range Tok_Entry .. Tok_Use;
    --  Keywords which start a declaration but can't start a compilation unit

    subtype Token_Class_After_SM is
      Token_Type range Tok_Less_Less .. Tok_EOF;
    --  Tokens which always, or almost always, appear after a semicolon. Used
    --  in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
    --  a semicolon is missing. Of significance only for error recovery.

    subtype Token_Class_Labeled_Stmt is
      Token_Type range Tok_Begin .. Tok_While;
    --  Tokens which start labeled statements

    type Token_Flag_Array is array (Token_Type) of Boolean;
    Is_Reserved_Keyword : constant Token_Flag_Array :=
                            Token_Flag_Array'
                              (Tok_Mod      .. Tok_Rem      => True,
                               Tok_New      .. Tok_Null     => True,
                               Tok_Delta    .. Tok_Range    => True,
                               Tok_And      .. Tok_Xor      => True,
                               Tok_In       .. Tok_Not      => True,
                               Tok_Abstract .. Tok_Then     => True,
                               Tok_Abort    .. Tok_Separate => True,
                               others                       => False);
    --  Flag array used to test for reserved word

    procedure Initialize_Ada_Keywords;
    --  Set up Token_Type values in Names table entries for Ada reserved
    --  words. This ignores Ada_Version; Ada_Version is taken into account in
    --  Snames.Is_Keyword_Name.

    --------------------------
    -- Scan State Variables --
    --------------------------

    --  Note: these variables can only be referenced during the parsing of a
    --  file. Reference to any of them from Sem or the expander is wrong.

    --  These variables are initialized by Scn.Initialize_Scanner, and should
    --  not be referenced before such a call, except for saving and restoring
    --  them.

    Scan_Ptr : Source_Ptr := No_Location;
    --  Current scan pointer location. After a call to Scan, this points
    --  just past the end of the token just scanned.

    Token : Token_Type := No_Token;
    --  Type of current token

    Token_Ptr : Source_Ptr := No_Location;
    --  Pointer to first character of current token

    Current_Line_Start : Source_Ptr := No_Location;
    --  Pointer to first character of line containing current token

    Start_Column : Column_Number := No_Column_Number;
    --  Starting column number (zero origin) of the first non-blank character
    --  on the line containing the current token. This is used for error
    --  recovery circuits which depend on looking at the column line up.

    Type_Token_Location : Source_Ptr := No_Location;
    --  Within a type declaration, gives the location of the TYPE keyword that
    --  opened the type declaration. Used in checking the end column of a record
    --  declaration, which can line up either with the TYPE keyword, or with the
    --  start of the line containing the RECORD keyword.

    Checksum : Word := 0;
    --  Used to accumulate a CRC representing the tokens in the source
    --  file being compiled. This CRC includes only program tokens, and
    --  excludes comments.

    Limited_Checksum : Word := 0;
    --  Used to accumulate a CRC representing significant tokens in the
    --  limited view of a package, i.e. visible type names and related
    --  tagged indicators.

    First_Non_Blank_Location : Source_Ptr := No_Location;
    --  Location of first non-blank character on the line containing the
    --  current token (i.e. the location of the character whose column number
    --  is stored in Start_Column).

    Token_Node : Node_Id := Empty;
    --  Node table Id for the current token. This is set only if the current
    --  token is one for which the scanner constructs a node (i.e. it is an
    --  identifier, operator symbol, or literal). For other token types,
    --  Token_Node is undefined.

    Token_Name : Name_Id := No_Name;
    --  For identifiers, this is set to the Name_Id of the identifier scanned.
    --  For all other tokens, Token_Name is set to Error_Name. Note that it
    --  would be possible for the caller to extract this information from
    --  Token_Node. We set Token_Name separately for two reasons. First it
    --  allows a quicker test for a specific identifier. Second, it allows
    --  a version of the parser to be built that does not build tree nodes,
    --  usable as a syntax checker.

    Prev_Token : Token_Type := No_Token;
    --  Type of previous token

    Prev_Token_Ptr : Source_Ptr;
    --  Pointer to first character of previous token

    Version_To_Be_Found : Boolean;
    --  This flag is True if the scanner is still looking for an RCS version
    --  number in a comment. Normally it is initialized to False so that this
    --  circuit is not activated. If the -dv switch is set, then this flag is
    --  initialized to True, and then reset when the version number is found.
    --  We do things this way to minimize the impact on comment scanning.

    Character_Code : Char_Code;
    --  Valid only when Token is Tok_Char_Literal. Contains the value of the
    --  scanned literal.

    Real_Literal_Value : Ureal;
    --  Valid only when Token is Tok_Real_Literal. Contains the value of the
    --  scanned literal.

    Int_Literal_Value : Uint;
    --  Valid only when Token = Tok_Integer_Literal, and we are not in
    --  syntax-only mode. Contains the value of the scanned literal.

    Based_Literal_Uses_Colon : Boolean;
    --  Valid only when Token = Tok_Integer_Literal or Tok_Real_Literal. Set
    --  True only for the case of a based literal using ':' instead of '#'.

    String_Literal_Id : String_Id;
    --  Valid only when Token = Tok_String_Literal or Tok_Operator_Symbol.
    --  Contains the Id for currently scanned string value.

    Wide_Character_Found : Boolean := False;
    --  Valid only when Token = Tok_String_Literal. Set True if wide character
    --  found (i.e. a character that does not fit in Character, but fits in
    --  Wide_Wide_Character).

    Wide_Wide_Character_Found : Boolean := False;
    --  Valid only when Token = Tok_String_Literal. Set True if wide wide
    --  character found (i.e. a character that does not fit in Character or
    --  Wide_Character).

    subtype Special_Preprocessor_Character is Character with
      Predicate => Special_Preprocessor_Character in '#' | '$';
    Special_Character : Special_Preprocessor_Character;

    Comment_Id : Name_Id := No_Name;
    --  Valid only when Token = Tok_Comment. Store the string that follows
    --  the "--" of a comment when scanning project files.
    --
    --  Is it really right for this to be a Name rather than a String, what
    --  about the case of Wide_Wide_Characters???

    Inside_Depends : Boolean := False;
    --  True while parsing the argument of a Depends or Refined_Depends pragma
    --  or aspect. Used to allow/require nonstandard style rules for =>+ with
    --  -gnatyt.

    Inside_Interpolated_String_Expression : Boolean := False;
    --  True while parsing an interpolated string expression

    Inside_Interpolated_String_Literal : Boolean := False;
    --  True while parsing an interpolated string literal

    Inside_If_Expression : Nat := 0;
    --  This is a counter that is set non-zero while scanning out an if
    --  expression (incremented on entry, decremented on exit). It is used to
    --  disconnect format checks that normally apply to keywords THEN, ELSE etc.

    Inside_Pragma : Boolean := False;
    --  True within a pragma. Used to avoid complaining about reserved words
    --  within pragmas (see Scan_Reserved_Identifier).

    --------------------------------------------------------
    -- Procedures for Saving and Restoring the Scan State --
    --------------------------------------------------------

    --  The following procedures can be used to save and restore the entire
    --  scan state. They are used in cases where it is necessary to backup
    --  the scan during the parse.

    type Saved_Scan_State is private;
    --  Used for saving and restoring the scan state

    procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
    pragma Inline (Save_Scan_State);
    --  Saves the current scan state for possible later restoration. Note that
    --  there is no harm in saving the state and then never restoring it.

    procedure Restore_Scan_State (Saved_State : Saved_Scan_State);
    pragma Inline (Restore_Scan_State);
    --  Restores a scan state saved by a call to Save_Scan_State.
    --  The saved scan state must refer to the current source file.

 private
    type Saved_Scan_State is record
       Save_Scan_Ptr                 : Source_Ptr;
       Save_Token                    : Token_Type;
       Save_Token_Ptr                : Source_Ptr;
       Save_Current_Line_Start       : Source_Ptr;
       Save_Start_Column             : Column_Number;
       Save_Checksum                 : Word;
       Save_First_Non_Blank_Location : Source_Ptr;
       Save_Token_Node               : Node_Id;
       Save_Token_Name               : Name_Id;
       Save_Prev_Token               : Token_Type;
       Save_Prev_Token_Ptr           : Source_Ptr;
    end record;

 end Scans;
	------------------------------------------------------------------------------
	-- --
	-- GNAT COMPILER COMPONENTS --
	-- --
	-- S C A N S --
	-- --
	-- S p e c --
	-- --
	-- Copyright (C) 1992-2023, Free Software Foundation, Inc. --
	-- --
	-- GNAT is free software; you can redistribute it and/or modify it under --
	-- terms of the GNU General Public License as published by the Free Soft- --
	-- ware Foundation; either version 3, or (at your option) any later ver- --
	-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
	-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
	-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
	-- for more details. You should have received a copy of the GNU General --
	-- Public License distributed with GNAT; see file COPYING3. If not, go to --
	-- http://www.gnu.org/licenses for a complete copy of the license. --
	-- --
	-- GNAT was originally developed by the GNAT team at New York University. --
	-- Extensive contributions were provided by Ada Core Technologies Inc. --
	-- --
	------------------------------------------------------------------------------

	with Namet; use Namet;
	with Types; use Types;
	with Uintp; use Uintp;
	with Urealp; use Urealp;

	package Scans is

	-- The scanner maintains a current state in the global variables defined
	-- in this package. The call to the Scan routine advances this state to
	-- the next token. The state is initialized by the call to one of the
	-- initialization routines in Sinput.

	-- The following type is used to identify token types returned by Scan.
	-- The class column in this table indicates the token classes which
	-- apply to the token, as defined by subsequent subtype declarations.

	type Token_Type is (

	-- Token name Token type Class(es)

	Tok_Integer_Literal, -- numeric lit Literal, Lit_Or_Name

	Tok_Real_Literal, -- numeric lit Literal, Lit_Or_Name

	Tok_String_Literal, -- string lit Literal. Lit_Or_Name

	Tok_Char_Literal, -- char lit Name, Literal. Lit_Or_Name

	Tok_Operator_Symbol, -- op symbol Name, Literal, Lit_Or_Name, Desig

	Tok_Identifier, -- identifier Name, Lit_Or_Name, Desig

	Tok_At_Sign, -- @ AI12-0125-3 : target name

	Tok_Double_Asterisk, -- **

	Tok_Ampersand, -- & Binary_Addop
	Tok_Minus, -- - Binary_Addop, Unary_Addop
	Tok_Plus, -- + Binary_Addop, Unary_Addop

	Tok_Asterisk, -- * Mulop
	Tok_Mod, -- MOD Mulop
	Tok_Rem, -- REM Mulop
	Tok_Slash, -- / Mulop

	Tok_New, -- NEW

	Tok_Abs, -- ABS
	Tok_Others, -- OTHERS
	Tok_Null, -- NULL

	-- Note: Tok_Raise is in no categories now, it used to be Cterm, Eterm,
	-- After_SM, but now that Ada 2012 has added raise expressions, the
	-- raise token can appear anywhere. Note in particular that Tok_Raise
	-- being in Eterm stopped the parser from recognizing "return raise
	-- exception-name". This degrades error recovery slightly, and perhaps
	-- we could do better, but not worth the effort.

	-- Ada 2022 introduces square brackets as delimiters for array and
	-- container aggregates.

	-- The left delimiter token of interpolated strings, and tokens { and }
	-- of interpolated expressions are currently placed in no category since
	-- they don't fit well in the existing categories.

	Tok_Left_Interpolated_String, -- f"
	Tok_Left_Curly_Bracket, -- {
	Tok_Raise, -- RAISE
	Tok_Right_Curly_Bracket, -- }

	Tok_Dot, -- . Namext
	Tok_Apostrophe, -- ' Namext

	Tok_Left_Bracket, -- [ Namext
	Tok_Left_Paren, -- ( Namext, Consk

	Tok_Delta, -- DELTA Atkwd, Sterm, Consk
	Tok_Digits, -- DIGITS Atkwd, Sterm, Consk
	Tok_Range, -- RANGE Atkwd, Sterm, Consk

	Tok_Right_Paren, -- ) Sterm
	Tok_Right_Bracket, -- ] Sterm
	Tok_Comma, -- , Sterm

	Tok_And, -- AND Logop, Sterm
	Tok_Or, -- OR Logop, Sterm
	Tok_Xor, -- XOR Logop, Sterm

	Tok_Less, -- < Relop, Sterm
	Tok_Equal, -- = Relop, Sterm
	Tok_Greater, -- > Relop, Sterm
	Tok_Not_Equal, -- /= Relop, Sterm
	Tok_Greater_Equal, -- >= Relop, Sterm
	Tok_Less_Equal, -- <= Relop, Sterm

	Tok_In, -- IN Relop, Sterm
	Tok_Not, -- NOT Relop, Sterm

	Tok_Box, -- <> Relop, Eterm, Sterm
	Tok_Colon_Equal, -- := Eterm, Sterm
	Tok_Colon, -- : Eterm, Sterm
	Tok_Greater_Greater, -- >> Eterm, Sterm

	Tok_Abstract, -- ABSTRACT Eterm, Sterm
	Tok_Access, -- ACCESS Eterm, Sterm
	Tok_Aliased, -- ALIASED Eterm, Sterm
	Tok_All, -- ALL Eterm, Sterm
	Tok_Array, -- ARRAY Eterm, Sterm
	Tok_At, -- AT Eterm, Sterm
	Tok_Body, -- BODY Eterm, Sterm
	Tok_Constant, -- CONSTANT Eterm, Sterm
	Tok_Do, -- DO Eterm, Sterm
	Tok_Is, -- IS Eterm, Sterm
	Tok_Interface, -- INTERFACE Eterm, Sterm
	Tok_Limited, -- LIMITED Eterm, Sterm
	Tok_Of, -- OF Eterm, Sterm
	Tok_Out, -- OUT Eterm, Sterm
	Tok_Record, -- RECORD Eterm, Sterm
	Tok_Renames, -- RENAMES Eterm, Sterm
	Tok_Reverse, -- REVERSE Eterm, Sterm
	Tok_Some, -- SOME Eterm, Sterm
	Tok_Tagged, -- TAGGED Eterm, Sterm
	Tok_Then, -- THEN Eterm, Sterm

	Tok_Less_Less, -- << Eterm, Sterm, After_SM

	Tok_Abort, -- ABORT Eterm, Sterm, After_SM
	Tok_Accept, -- ACCEPT Eterm, Sterm, After_SM
	Tok_Case, -- CASE Eterm, Sterm, After_SM
	Tok_Delay, -- DELAY Eterm, Sterm, After_SM
	Tok_Else, -- ELSE Eterm, Sterm, After_SM
	Tok_Elsif, -- ELSIF Eterm, Sterm, After_SM
	Tok_End, -- END Eterm, Sterm, After_SM
	Tok_Exception, -- EXCEPTION Eterm, Sterm, After_SM
	Tok_Exit, -- EXIT Eterm, Sterm, After_SM
	Tok_Goto, -- GOTO Eterm, Sterm, After_SM
	Tok_If, -- IF Eterm, Sterm, After_SM
	Tok_Pragma, -- PRAGMA Eterm, Sterm, After_SM
	Tok_Requeue, -- REQUEUE Eterm, Sterm, After_SM
	Tok_Return, -- RETURN Eterm, Sterm, After_SM
	Tok_Select, -- SELECT Eterm, Sterm, After_SM
	Tok_Terminate, -- TERMINATE Eterm, Sterm, After_SM
	Tok_Until, -- UNTIL Eterm, Sterm, After_SM
	Tok_When, -- WHEN Eterm, Sterm, After_SM

	Tok_Begin, -- BEGIN Eterm, Sterm, After_SM, Labeled_Stmt
	Tok_Declare, -- DECLARE Eterm, Sterm, After_SM, Labeled_Stmt
	Tok_For, -- FOR Eterm, Sterm, After_SM, Labeled_Stmt
	Tok_Loop, -- LOOP Eterm, Sterm, After_SM, Labeled_Stmt
	Tok_While, -- WHILE Eterm, Sterm, After_SM, Labeled_Stmt

	Tok_Entry, -- ENTRY Eterm, Sterm, Declk, Deckn, After_SM
	Tok_Protected, -- PROTECTED Eterm, Sterm, Declk, Deckn, After_SM
	Tok_Task, -- TASK Eterm, Sterm, Declk, Deckn, After_SM
	Tok_Type, -- TYPE Eterm, Sterm, Declk, Deckn, After_SM
	Tok_Subtype, -- SUBTYPE Eterm, Sterm, Declk, Deckn, After_SM
	Tok_Overriding, -- OVERRIDING Eterm, Sterm, Declk, Declk, After_SM
	Tok_Synchronized, -- SYNCHRONIZED Eterm, Sterm, Declk, Deckn, After_SM
	Tok_Use, -- USE Eterm, Sterm, Declk, Deckn, After_SM

	Tok_Function, -- FUNCTION Eterm, Sterm, Cunit, Declk, After_SM
	Tok_Generic, -- GENERIC Eterm, Sterm, Cunit, Declk, After_SM
	Tok_Package, -- PACKAGE Eterm, Sterm, Cunit, Declk, After_SM
	Tok_Procedure, -- PROCEDURE Eterm, Sterm, Cunit, Declk, After_SM

	Tok_Private, -- PRIVATE Eterm, Sterm, Cunit, After_SM
	Tok_With, -- WITH Eterm, Sterm, Cunit, After_SM
	Tok_Separate, -- SEPARATE Eterm, Sterm, Cunit, After_SM

	Tok_EOF, -- End of file Eterm, Sterm, Cterm, After_SM

	Tok_Semicolon, -- ; Eterm, Sterm, Cterm

	Tok_Arrow, -- => Sterm, Cterm, Chtok

	Tok_Vertical_Bar, -- \| Cterm, Sterm, Chtok

	Tok_Dot_Dot, -- .. Sterm, Chtok

	Tok_Project,
	Tok_Extends,
	Tok_External,
	Tok_External_As_List,
	-- These four entries represent keywords for the project file language
	-- and can be returned only in the case of scanning project files.

	Tok_Comment,
	-- This entry is used when scanning project files (where it represents
	-- an entire comment), and in preprocessing with the -C switch set
	-- (where it represents just the "--" of a comment). For the project
	-- file case, the text of the comment is stored in Comment_Id.

	Tok_End_Of_Line,
	-- Represents an end of line. Not used during normal compilation scans
	-- where end of line is ignored. Active for preprocessor scanning.

	Tok_Special,
	-- Special character used by the preprocessor. The character itself is
	-- stored in Special_Character below.

	No_Token);
	-- No_Token is used for initializing Token values to indicate that
	-- no value has been set yet.

	function Keyword_Name (Token : Token_Type) return Name_Id;
	-- Given a token that is a reserved word, return the corresponding Name_Id
	-- in lower case. E.g. Keyword_Name (Tok_Begin) = Name_Find ("begin").
	-- It is an error to pass any other kind of token.

	-- Note: in the RM, operator symbol is a special case of string literal.
	-- We distinguish at the lexical level in this compiler, since there are
	-- many syntactic situations in which only an operator symbol is allowed.

	-- The following subtype declarations group the token types into classes.
	-- These are used for class tests in the parser.

	subtype Token_Class_Numeric_Literal is
	Token_Type range Tok_Integer_Literal .. Tok_Real_Literal;
	-- Numeric literal

	subtype Token_Class_Literal is
	Token_Type range Tok_Integer_Literal .. Tok_Operator_Symbol;
	-- Literal

	subtype Token_Class_Lit_Or_Name is
	Token_Type range Tok_Integer_Literal .. Tok_Identifier;

	subtype Token_Class_Binary_Addop is
	Token_Type range Tok_Ampersand .. Tok_Plus;
	-- Binary adding operator (& + -)

	subtype Token_Class_Unary_Addop is
	Token_Type range Tok_Minus .. Tok_Plus;
	-- Unary adding operator (+ -)

	subtype Token_Class_Mulop is
	Token_Type range Tok_Asterisk .. Tok_Slash;
	-- Multiplying operator

	subtype Token_Class_Logop is
	Token_Type range Tok_And .. Tok_Xor;
	-- Logical operator (and, or, xor)

	subtype Token_Class_Relop is
	Token_Type range Tok_Less .. Tok_Box;
	-- Relational operator (= /= < <= > >= not, in plus <> to catch misuse
	-- of Pascal style not equal operator).

	subtype Token_Class_Name is
	Token_Type range Tok_Char_Literal .. Tok_At_Sign;
	-- First token of name (4.1),
	-- (identifier, char literal, operator symbol)
	-- Includes '@' after Ada2012 corrigendum.

	subtype Token_Class_Desig is
	Token_Type range Tok_Operator_Symbol .. Tok_At_Sign;
	-- Token which can be a Designator (identifier, operator symbol)

	subtype Token_Class_Namext is
	Token_Type range Tok_Dot .. Tok_Left_Paren;
	-- Name extension tokens. These are tokens which can appear immediately
	-- after a name to extend it recursively (period, quote, left paren)

	subtype Token_Class_Consk is
	Token_Type range Tok_Left_Paren .. Tok_Range;
	-- Keywords which can start constraint
	-- (left paren, delta, digits, range)

	subtype Token_Class_Eterm is
	Token_Type range Tok_Colon_Equal .. Tok_Semicolon;
	-- Expression terminators. These tokens can never appear within a simple
	-- expression. This is used for error recovery purposes (if we encounter
	-- an error in an expression, we simply scan to the next Eterm token).

	subtype Token_Class_Sterm is
	Token_Type range Tok_Delta .. Tok_Dot_Dot;
	-- Simple_Expression terminators. A Simple_Expression must be followed
	-- by a token in this class, or an error message is issued complaining
	-- about a missing binary operator.

	subtype Token_Class_Atkwd is
	Token_Type range Tok_Delta .. Tok_Range;
	-- Attribute keywords. This class includes keywords which can be used
	-- as an Attribute_Designator, namely DELTA, DIGITS and RANGE

	subtype Token_Class_Cterm is
	Token_Type range Tok_EOF .. Tok_Vertical_Bar;
	-- Choice terminators. These tokens terminate a choice. This is used for
	-- error recovery purposes (if we encounter an error in a Choice, we
	-- simply scan to the next Cterm token).

	subtype Token_Class_Chtok is
	Token_Type range Tok_Arrow .. Tok_Dot_Dot;
	-- Choice tokens. These tokens signal a choice when used in an Aggregate

	subtype Token_Class_Cunit is
	Token_Type range Tok_Function .. Tok_Separate;
	-- Tokens which can begin a compilation unit

	subtype Token_Class_Declk is
	Token_Type range Tok_Entry .. Tok_Procedure;
	-- Keywords which start a declaration

	subtype Token_Class_Deckn is
	Token_Type range Tok_Entry .. Tok_Use;
	-- Keywords which start a declaration but can't start a compilation unit

	subtype Token_Class_After_SM is
	Token_Type range Tok_Less_Less .. Tok_EOF;
	-- Tokens which always, or almost always, appear after a semicolon. Used
	-- in the Resync_Past_Semicolon routine to avoid gobbling up stuff when
	-- a semicolon is missing. Of significance only for error recovery.

	subtype Token_Class_Labeled_Stmt is
	Token_Type range Tok_Begin .. Tok_While;
	-- Tokens which start labeled statements

	type Token_Flag_Array is array (Token_Type) of Boolean;
	Is_Reserved_Keyword : constant Token_Flag_Array :=
	Token_Flag_Array'
	(Tok_Mod .. Tok_Rem => True,
	Tok_New .. Tok_Null => True,
	Tok_Delta .. Tok_Range => True,
	Tok_And .. Tok_Xor => True,
	Tok_In .. Tok_Not => True,
	Tok_Abstract .. Tok_Then => True,
	Tok_Abort .. Tok_Separate => True,
	others => False);
	-- Flag array used to test for reserved word

	procedure Initialize_Ada_Keywords;
	-- Set up Token_Type values in Names table entries for Ada reserved
	-- words. This ignores Ada_Version; Ada_Version is taken into account in
	-- Snames.Is_Keyword_Name.

	--------------------------
	-- Scan State Variables --
	--------------------------

	-- Note: these variables can only be referenced during the parsing of a
	-- file. Reference to any of them from Sem or the expander is wrong.

	-- These variables are initialized by Scn.Initialize_Scanner, and should
	-- not be referenced before such a call, except for saving and restoring
	-- them.

	Scan_Ptr : Source_Ptr := No_Location;
	-- Current scan pointer location. After a call to Scan, this points
	-- just past the end of the token just scanned.

	Token : Token_Type := No_Token;
	-- Type of current token

	Token_Ptr : Source_Ptr := No_Location;
	-- Pointer to first character of current token

	Current_Line_Start : Source_Ptr := No_Location;
	-- Pointer to first character of line containing current token

	Start_Column : Column_Number := No_Column_Number;
	-- Starting column number (zero origin) of the first non-blank character
	-- on the line containing the current token. This is used for error
	-- recovery circuits which depend on looking at the column line up.

	Type_Token_Location : Source_Ptr := No_Location;
	-- Within a type declaration, gives the location of the TYPE keyword that
	-- opened the type declaration. Used in checking the end column of a record
	-- declaration, which can line up either with the TYPE keyword, or with the
	-- start of the line containing the RECORD keyword.

	Checksum : Word := 0;
	-- Used to accumulate a CRC representing the tokens in the source
	-- file being compiled. This CRC includes only program tokens, and
	-- excludes comments.

	Limited_Checksum : Word := 0;
	-- Used to accumulate a CRC representing significant tokens in the
	-- limited view of a package, i.e. visible type names and related
	-- tagged indicators.

	First_Non_Blank_Location : Source_Ptr := No_Location;
	-- Location of first non-blank character on the line containing the
	-- current token (i.e. the location of the character whose column number
	-- is stored in Start_Column).

	Token_Node : Node_Id := Empty;
	-- Node table Id for the current token. This is set only if the current
	-- token is one for which the scanner constructs a node (i.e. it is an
	-- identifier, operator symbol, or literal). For other token types,
	-- Token_Node is undefined.

	Token_Name : Name_Id := No_Name;
	-- For identifiers, this is set to the Name_Id of the identifier scanned.
	-- For all other tokens, Token_Name is set to Error_Name. Note that it
	-- would be possible for the caller to extract this information from
	-- Token_Node. We set Token_Name separately for two reasons. First it
	-- allows a quicker test for a specific identifier. Second, it allows
	-- a version of the parser to be built that does not build tree nodes,
	-- usable as a syntax checker.

	Prev_Token : Token_Type := No_Token;
	-- Type of previous token

	Prev_Token_Ptr : Source_Ptr;
	-- Pointer to first character of previous token

	Version_To_Be_Found : Boolean;
	-- This flag is True if the scanner is still looking for an RCS version
	-- number in a comment. Normally it is initialized to False so that this
	-- circuit is not activated. If the -dv switch is set, then this flag is
	-- initialized to True, and then reset when the version number is found.
	-- We do things this way to minimize the impact on comment scanning.

	Character_Code : Char_Code;
	-- Valid only when Token is Tok_Char_Literal. Contains the value of the
	-- scanned literal.

	Real_Literal_Value : Ureal;
	-- Valid only when Token is Tok_Real_Literal. Contains the value of the
	-- scanned literal.

	Int_Literal_Value : Uint;
	-- Valid only when Token = Tok_Integer_Literal, and we are not in
	-- syntax-only mode. Contains the value of the scanned literal.

	Based_Literal_Uses_Colon : Boolean;
	-- Valid only when Token = Tok_Integer_Literal or Tok_Real_Literal. Set
	-- True only for the case of a based literal using ':' instead of '#'.

	String_Literal_Id : String_Id;
	-- Valid only when Token = Tok_String_Literal or Tok_Operator_Symbol.
	-- Contains the Id for currently scanned string value.

	Wide_Character_Found : Boolean := False;
	-- Valid only when Token = Tok_String_Literal. Set True if wide character
	-- found (i.e. a character that does not fit in Character, but fits in
	-- Wide_Wide_Character).

	Wide_Wide_Character_Found : Boolean := False;
	-- Valid only when Token = Tok_String_Literal. Set True if wide wide
	-- character found (i.e. a character that does not fit in Character or
	-- Wide_Character).

	subtype Special_Preprocessor_Character is Character with
	Predicate => Special_Preprocessor_Character in '#' \| '$';
	Special_Character : Special_Preprocessor_Character;

	Comment_Id : Name_Id := No_Name;
	-- Valid only when Token = Tok_Comment. Store the string that follows
	-- the "--" of a comment when scanning project files.
	--
	-- Is it really right for this to be a Name rather than a String, what
	-- about the case of Wide_Wide_Characters???

	Inside_Depends : Boolean := False;
	-- True while parsing the argument of a Depends or Refined_Depends pragma
	-- or aspect. Used to allow/require nonstandard style rules for =>+ with
	-- -gnatyt.

	Inside_Interpolated_String_Expression : Boolean := False;
	-- True while parsing an interpolated string expression

	Inside_Interpolated_String_Literal : Boolean := False;
	-- True while parsing an interpolated string literal

	Inside_If_Expression : Nat := 0;
	-- This is a counter that is set non-zero while scanning out an if
	-- expression (incremented on entry, decremented on exit). It is used to
	-- disconnect format checks that normally apply to keywords THEN, ELSE etc.

	Inside_Pragma : Boolean := False;
	-- True within a pragma. Used to avoid complaining about reserved words
	-- within pragmas (see Scan_Reserved_Identifier).

	--------------------------------------------------------
	-- Procedures for Saving and Restoring the Scan State --
	--------------------------------------------------------

	-- The following procedures can be used to save and restore the entire
	-- scan state. They are used in cases where it is necessary to backup
	-- the scan during the parse.

	type Saved_Scan_State is private;
	-- Used for saving and restoring the scan state

	procedure Save_Scan_State (Saved_State : out Saved_Scan_State);
	pragma Inline (Save_Scan_State);
	-- Saves the current scan state for possible later restoration. Note that
	-- there is no harm in saving the state and then never restoring it.

	procedure Restore_Scan_State (Saved_State : Saved_Scan_State);
	pragma Inline (Restore_Scan_State);
	-- Restores a scan state saved by a call to Save_Scan_State.
	-- The saved scan state must refer to the current source file.

	private
	type Saved_Scan_State is record
	Save_Scan_Ptr : Source_Ptr;
	Save_Token : Token_Type;
	Save_Token_Ptr : Source_Ptr;
	Save_Current_Line_Start : Source_Ptr;
	Save_Start_Column : Column_Number;
	Save_Checksum : Word;
	Save_First_Non_Blank_Location : Source_Ptr;
	Save_Token_Node : Node_Id;
	Save_Token_Name : Name_Id;
	Save_Prev_Token : Token_Type;
	Save_Prev_Token_Ptr : Source_Ptr;
	end record;

	end Scans;