gcc/ada/scn-slit.adb - gcc - Git at Google

 ------------------------------------------------------------------------------
 --                                                                          --
 --                         GNAT COMPILER COMPONENTS                         --
 --                                                                          --
 --                             S C N . S L I T                              --
 --                                                                          --
 --                                 B o d y                                  --
 --                                                                          --
 --                            $Revision$
 --                                                                          --
 --          Copyright (C) 1992-2001 Free Software Foundation, Inc.          --
 --                                                                          --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
 -- ware  Foundation;  either version 2,  or (at your option) any later ver- --
 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
 -- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
 -- for  more details.  You should have  received  a copy of the GNU General --
 -- Public License  distributed with GNAT;  see file COPYING.  If not, write --
 -- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
 -- MA 02111-1307, USA.                                                      --
 --                                                                          --
 -- GNAT was originally developed  by the GNAT team at  New York University. --
 -- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
 --                                                                          --
 ------------------------------------------------------------------------------

 with Stringt; use Stringt;

 separate (Scn)
 procedure Slit is

    Delimiter : Character;
    --  Delimiter (first character of string)

    C : Character;
    --  Current source program character

    Code : Char_Code;
    --  Current character code value

    Err : Boolean;
    --  Error flag for Scan_Wide call

    String_Literal_Id : String_Id;
    --  Id for currently scanned string value

    Wide_Character_Found : Boolean := False;
    --  Set True if wide character found

    procedure Error_Bad_String_Char;
    --  Signal bad character in string/character literal. On entry Scan_Ptr
    --  points to the improper character encountered during the scan. Scan_Ptr
    --  is not modified, so it still points to the bad character on return.

    procedure Error_Unterminated_String;
    --  Procedure called if a line terminator character is encountered during
    --  scanning a string, meaning that the string is not properly terminated.

    procedure Set_String;
    --  Procedure used to distinguish between string and operator symbol.
    --  On entry the string has been scanned out, and its characters start
    --  at Token_Ptr and end one character before Scan_Ptr. On exit Token
    --  is set to Tok_String_Literal or Tok_Operator_Symbol as appropriate,
    --  and Token_Node is appropriately initialized. In addition, in the
    --  operator symbol case, Token_Name is appropriately set.

    ---------------------------
    -- Error_Bad_String_Char --
    ---------------------------

    procedure Error_Bad_String_Char is
       C : constant Character := Source (Scan_Ptr);

    begin
       if C = HT then
          Error_Msg_S ("horizontal tab not allowed in string");

       elsif C = VT or else C = FF then
          Error_Msg_S ("format effector not allowed in string");

       elsif C in Upper_Half_Character then
          Error_Msg_S ("(Ada 83) upper half character not allowed");

       else
          Error_Msg_S ("control character not allowed in string");
       end if;
    end Error_Bad_String_Char;

    -------------------------------
    -- Error_Unterminated_String --
    -------------------------------

    procedure Error_Unterminated_String is
    begin
       --  An interesting little refinement. Consider the following examples:

       --     A := "this is an unterminated string;
       --     A := "this is an unterminated string &
       --     P(A, "this is a parameter that didn't get terminated);

       --  We fiddle a little to do slightly better placement in these cases
       --  also if there is white space at the end of the line we place the
       --  flag at the start of this white space, not at the end. Note that
       --  we only have to test for blanks, since tabs aren't allowed in
       --  strings in the first place and would have caused an error message.

       --  Two more cases that we treat specially are:

       --     A := "this string uses the wrong terminator'
       --     A := "this string uses the wrong terminator' &

       --  In these cases we give a different error message as well

       --  We actually reposition the scan pointer to the point where we
       --  place the flag in these cases, since it seems a better bet on
       --  the original intention.

       while Source (Scan_Ptr - 1) = ' '
         or else Source (Scan_Ptr - 1) = '&'
       loop
          Scan_Ptr := Scan_Ptr - 1;
          Unstore_String_Char;
       end loop;

       --  Check for case of incorrect string terminator, but single quote is
       --  not considered incorrect if the opening terminator misused a single
       --  quote (error message already given).

       if Delimiter /= '''
         and then Source (Scan_Ptr - 1) = '''
       then
          Unstore_String_Char;
          Error_Msg ("incorrect string terminator character", Scan_Ptr - 1);
          return;
       end if;

       if Source (Scan_Ptr - 1) = ';' then
          Scan_Ptr := Scan_Ptr - 1;
          Unstore_String_Char;

          if Source (Scan_Ptr - 1) = ')' then
             Scan_Ptr := Scan_Ptr - 1;
             Unstore_String_Char;
          end if;
       end if;

       Error_Msg_S ("missing string quote");
    end Error_Unterminated_String;

    ----------------
    -- Set_String --
    ----------------

    procedure Set_String is
       Slen : Int := Int (Scan_Ptr - Token_Ptr - 2);
       C1   : Character;
       C2   : Character;
       C3   : Character;

    begin
       --  Token_Name is currently set to Error_Name. The following section of
       --  code resets Token_Name to the proper Name_Op_xx value if the string
       --  is a valid operator symbol, otherwise it is left set to Error_Name.

       if Slen = 1 then
          C1 := Source (Token_Ptr + 1);

          case C1 is
             when '=' =>
                Token_Name := Name_Op_Eq;

             when '>' =>
                Token_Name := Name_Op_Gt;

             when '<' =>
                Token_Name := Name_Op_Lt;

             when '+' =>
                Token_Name := Name_Op_Add;

             when '-' =>
                Token_Name := Name_Op_Subtract;

             when '&' =>
                Token_Name := Name_Op_Concat;

             when '*' =>
                Token_Name := Name_Op_Multiply;

             when '/' =>
                Token_Name := Name_Op_Divide;

             when others =>
                null;
          end case;

       elsif Slen = 2 then
          C1 := Source (Token_Ptr + 1);
          C2 := Source (Token_Ptr + 2);

          if C1 = '*' and then C2 = '*' then
             Token_Name := Name_Op_Expon;

          elsif C2 = '=' then

             if C1 = '/' then
                Token_Name := Name_Op_Ne;
             elsif C1 = '<' then
                Token_Name := Name_Op_Le;
             elsif C1 = '>' then
                Token_Name := Name_Op_Ge;
             end if;

          elsif (C1 = 'O' or else C1 = 'o') and then    -- OR
                (C2 = 'R' or else C2 = 'r')
          then
             Token_Name := Name_Op_Or;
          end if;

       elsif Slen = 3 then
          C1 := Source (Token_Ptr + 1);
          C2 := Source (Token_Ptr + 2);
          C3 := Source (Token_Ptr + 3);

          if (C1 = 'A' or else C1 = 'a') and then       -- AND
             (C2 = 'N' or else C2 = 'n') and then
             (C3 = 'D' or else C3 = 'd')
          then
             Token_Name := Name_Op_And;

          elsif (C1 = 'A' or else C1 = 'a') and then    -- ABS
                (C2 = 'B' or else C2 = 'b') and then
                (C3 = 'S' or else C3 = 's')
          then
             Token_Name := Name_Op_Abs;

          elsif (C1 = 'M' or else C1 = 'm') and then    -- MOD
                (C2 = 'O' or else C2 = 'o') and then
                (C3 = 'D' or else C3 = 'd')
          then
             Token_Name := Name_Op_Mod;

          elsif (C1 = 'N' or else C1 = 'n') and then    -- NOT
                (C2 = 'O' or else C2 = 'o') and then
                (C3 = 'T' or else C3 = 't')
          then
             Token_Name := Name_Op_Not;

          elsif (C1 = 'R' or else C1 = 'r') and then    -- REM
                (C2 = 'E' or else C2 = 'e') and then
                (C3 = 'M' or else C3 = 'm')
          then
             Token_Name := Name_Op_Rem;

          elsif (C1 = 'X' or else C1 = 'x') and then    -- XOR
                (C2 = 'O' or else C2 = 'o') and then
                (C3 = 'R' or else C3 = 'r')
          then
             Token_Name := Name_Op_Xor;
          end if;

       end if;

       --  If it is an operator symbol, then Token_Name is set. If it is some
       --  other string value, then Token_Name still contains Error_Name.

       if Token_Name = Error_Name then
          Token := Tok_String_Literal;
          Token_Node := New_Node (N_String_Literal, Token_Ptr);
          Set_Has_Wide_Character (Token_Node, Wide_Character_Found);

       else
          Token := Tok_Operator_Symbol;
          Token_Node := New_Node (N_Operator_Symbol, Token_Ptr);
          Set_Chars (Token_Node, Token_Name);
       end if;

       Set_Strval (Token_Node, String_Literal_Id);

    end Set_String;

 ----------
 -- Slit --
 ----------

 begin
    --  On entry, Scan_Ptr points to the opening character of the string which
    --  is either a percent, double quote, or apostrophe (single quote). The
    --  latter case is an error detected by the character literal circuit.

    Delimiter := Source (Scan_Ptr);
    Accumulate_Checksum (Delimiter);
    Start_String;
    Scan_Ptr := Scan_Ptr + 1;

    --  Loop to scan out characters of string literal

    loop
       C := Source (Scan_Ptr);

       if C = Delimiter then
          Accumulate_Checksum (C);
          Scan_Ptr := Scan_Ptr + 1;
          exit when Source (Scan_Ptr) /= Delimiter;
          Code := Get_Char_Code (C);
          Accumulate_Checksum (C);
          Scan_Ptr := Scan_Ptr + 1;

       else
          if C = '"' and then Delimiter = '%' then
             Error_Msg_S ("quote not allowed in percent delimited string");
             Code := Get_Char_Code (C);
             Scan_Ptr := Scan_Ptr + 1;

          elsif (C = ESC
                  and then
                 Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
            or else
                (C in Upper_Half_Character
                  and then
                 Upper_Half_Encoding)
            or else
                (C = '['
                  and then
                 Source (Scan_Ptr + 1) = '"'
                  and then
                 Identifier_Char (Source (Scan_Ptr + 2)))
          then
             Scan_Wide (Source, Scan_Ptr, Code, Err);
             Accumulate_Checksum (Code);

             if Err then
                Error_Illegal_Wide_Character;
                Code := Get_Char_Code (' ');
             end if;

          else
             Accumulate_Checksum (C);

             if C not in Graphic_Character then
                if C in Line_Terminator then
                   Error_Unterminated_String;
                   exit;

                elsif C in Upper_Half_Character then
                   if Ada_83 then
                      Error_Bad_String_Char;
                   end if;

                else
                   Error_Bad_String_Char;
                end if;
             end if;

             Code := Get_Char_Code (C);
             Scan_Ptr := Scan_Ptr + 1;
          end if;
       end if;

       Store_String_Char (Code);

       if not In_Character_Range (Code) then
          Wide_Character_Found := True;
       end if;
    end loop;

    String_Literal_Id := End_String;
    Set_String;
    return;

 end Slit;
	------------------------------------------------------------------------------
	-- --
	-- GNAT COMPILER COMPONENTS --
	-- --
	-- S C N . S L I T --
	-- --
	-- B o d y --
	-- --
	-- $Revision$
	-- --
	-- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
	-- --
	-- GNAT is free software; you can redistribute it and/or modify it under --
	-- terms of the GNU General Public License as published by the Free Soft- --
	-- ware Foundation; either version 2, or (at your option) any later ver- --
	-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
	-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
	-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
	-- for more details. You should have received a copy of the GNU General --
	-- Public License distributed with GNAT; see file COPYING. If not, write --
	-- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
	-- MA 02111-1307, USA. --
	-- --
	-- GNAT was originally developed by the GNAT team at New York University. --
	-- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
	-- --
	------------------------------------------------------------------------------

	with Stringt; use Stringt;

	separate (Scn)
	procedure Slit is

	Delimiter : Character;
	-- Delimiter (first character of string)

	C : Character;
	-- Current source program character

	Code : Char_Code;
	-- Current character code value

	Err : Boolean;
	-- Error flag for Scan_Wide call

	String_Literal_Id : String_Id;
	-- Id for currently scanned string value

	Wide_Character_Found : Boolean := False;
	-- Set True if wide character found

	procedure Error_Bad_String_Char;
	-- Signal bad character in string/character literal. On entry Scan_Ptr
	-- points to the improper character encountered during the scan. Scan_Ptr
	-- is not modified, so it still points to the bad character on return.

	procedure Error_Unterminated_String;
	-- Procedure called if a line terminator character is encountered during
	-- scanning a string, meaning that the string is not properly terminated.

	procedure Set_String;
	-- Procedure used to distinguish between string and operator symbol.
	-- On entry the string has been scanned out, and its characters start
	-- at Token_Ptr and end one character before Scan_Ptr. On exit Token
	-- is set to Tok_String_Literal or Tok_Operator_Symbol as appropriate,
	-- and Token_Node is appropriately initialized. In addition, in the
	-- operator symbol case, Token_Name is appropriately set.

	---------------------------
	-- Error_Bad_String_Char --
	---------------------------

	procedure Error_Bad_String_Char is
	C : constant Character := Source (Scan_Ptr);

	begin
	if C = HT then
	Error_Msg_S ("horizontal tab not allowed in string");

	elsif C = VT or else C = FF then
	Error_Msg_S ("format effector not allowed in string");

	elsif C in Upper_Half_Character then
	Error_Msg_S ("(Ada 83) upper half character not allowed");

	else
	Error_Msg_S ("control character not allowed in string");
	end if;
	end Error_Bad_String_Char;

	-------------------------------
	-- Error_Unterminated_String --
	-------------------------------

	procedure Error_Unterminated_String is
	begin
	-- An interesting little refinement. Consider the following examples:

	-- A := "this is an unterminated string;
	-- A := "this is an unterminated string &
	-- P(A, "this is a parameter that didn't get terminated);

	-- We fiddle a little to do slightly better placement in these cases
	-- also if there is white space at the end of the line we place the
	-- flag at the start of this white space, not at the end. Note that
	-- we only have to test for blanks, since tabs aren't allowed in
	-- strings in the first place and would have caused an error message.

	-- Two more cases that we treat specially are:

	-- A := "this string uses the wrong terminator'
	-- A := "this string uses the wrong terminator' &

	-- In these cases we give a different error message as well

	-- We actually reposition the scan pointer to the point where we
	-- place the flag in these cases, since it seems a better bet on
	-- the original intention.

	while Source (Scan_Ptr - 1) = ' '
	or else Source (Scan_Ptr - 1) = '&'
	loop
	Scan_Ptr := Scan_Ptr - 1;
	Unstore_String_Char;
	end loop;

	-- Check for case of incorrect string terminator, but single quote is
	-- not considered incorrect if the opening terminator misused a single
	-- quote (error message already given).

	if Delimiter /= '''
	and then Source (Scan_Ptr - 1) = '''
	then
	Unstore_String_Char;
	Error_Msg ("incorrect string terminator character", Scan_Ptr - 1);
	return;
	end if;

	if Source (Scan_Ptr - 1) = ';' then
	Scan_Ptr := Scan_Ptr - 1;
	Unstore_String_Char;

	if Source (Scan_Ptr - 1) = ')' then
	Scan_Ptr := Scan_Ptr - 1;
	Unstore_String_Char;
	end if;
	end if;

	Error_Msg_S ("missing string quote");
	end Error_Unterminated_String;

	----------------
	-- Set_String --
	----------------

	procedure Set_String is
	Slen : Int := Int (Scan_Ptr - Token_Ptr - 2);
	C1 : Character;
	C2 : Character;
	C3 : Character;

	begin
	-- Token_Name is currently set to Error_Name. The following section of
	-- code resets Token_Name to the proper Name_Op_xx value if the string
	-- is a valid operator symbol, otherwise it is left set to Error_Name.

	if Slen = 1 then
	C1 := Source (Token_Ptr + 1);

	case C1 is
	when '=' =>
	Token_Name := Name_Op_Eq;

	when '>' =>
	Token_Name := Name_Op_Gt;

	when '<' =>
	Token_Name := Name_Op_Lt;

	when '+' =>
	Token_Name := Name_Op_Add;

	when '-' =>
	Token_Name := Name_Op_Subtract;

	when '&' =>
	Token_Name := Name_Op_Concat;

	when '*' =>
	Token_Name := Name_Op_Multiply;

	when '/' =>
	Token_Name := Name_Op_Divide;

	when others =>
	null;
	end case;

	elsif Slen = 2 then
	C1 := Source (Token_Ptr + 1);
	C2 := Source (Token_Ptr + 2);

	if C1 = '' and then C2 = '' then
	Token_Name := Name_Op_Expon;

	elsif C2 = '=' then

	if C1 = '/' then
	Token_Name := Name_Op_Ne;
	elsif C1 = '<' then
	Token_Name := Name_Op_Le;
	elsif C1 = '>' then
	Token_Name := Name_Op_Ge;
	end if;

	elsif (C1 = 'O' or else C1 = 'o') and then -- OR
	(C2 = 'R' or else C2 = 'r')
	then
	Token_Name := Name_Op_Or;
	end if;

	elsif Slen = 3 then
	C1 := Source (Token_Ptr + 1);
	C2 := Source (Token_Ptr + 2);
	C3 := Source (Token_Ptr + 3);

	if (C1 = 'A' or else C1 = 'a') and then -- AND
	(C2 = 'N' or else C2 = 'n') and then
	(C3 = 'D' or else C3 = 'd')
	then
	Token_Name := Name_Op_And;

	elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
	(C2 = 'B' or else C2 = 'b') and then
	(C3 = 'S' or else C3 = 's')
	then
	Token_Name := Name_Op_Abs;

	elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
	(C2 = 'O' or else C2 = 'o') and then
	(C3 = 'D' or else C3 = 'd')
	then
	Token_Name := Name_Op_Mod;

	elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
	(C2 = 'O' or else C2 = 'o') and then
	(C3 = 'T' or else C3 = 't')
	then
	Token_Name := Name_Op_Not;

	elsif (C1 = 'R' or else C1 = 'r') and then -- REM
	(C2 = 'E' or else C2 = 'e') and then
	(C3 = 'M' or else C3 = 'm')
	then
	Token_Name := Name_Op_Rem;

	elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
	(C2 = 'O' or else C2 = 'o') and then
	(C3 = 'R' or else C3 = 'r')
	then
	Token_Name := Name_Op_Xor;
	end if;

	end if;

	-- If it is an operator symbol, then Token_Name is set. If it is some
	-- other string value, then Token_Name still contains Error_Name.

	if Token_Name = Error_Name then
	Token := Tok_String_Literal;
	Token_Node := New_Node (N_String_Literal, Token_Ptr);
	Set_Has_Wide_Character (Token_Node, Wide_Character_Found);

	else
	Token := Tok_Operator_Symbol;
	Token_Node := New_Node (N_Operator_Symbol, Token_Ptr);
	Set_Chars (Token_Node, Token_Name);
	end if;

	Set_Strval (Token_Node, String_Literal_Id);

	end Set_String;

	----------
	-- Slit --
	----------

	begin
	-- On entry, Scan_Ptr points to the opening character of the string which
	-- is either a percent, double quote, or apostrophe (single quote). The
	-- latter case is an error detected by the character literal circuit.

	Delimiter := Source (Scan_Ptr);
	Accumulate_Checksum (Delimiter);
	Start_String;
	Scan_Ptr := Scan_Ptr + 1;

	-- Loop to scan out characters of string literal

	loop
	C := Source (Scan_Ptr);

	if C = Delimiter then
	Accumulate_Checksum (C);
	Scan_Ptr := Scan_Ptr + 1;
	exit when Source (Scan_Ptr) /= Delimiter;
	Code := Get_Char_Code (C);
	Accumulate_Checksum (C);
	Scan_Ptr := Scan_Ptr + 1;

	else
	if C = '"' and then Delimiter = '%' then
	Error_Msg_S ("quote not allowed in percent delimited string");
	Code := Get_Char_Code (C);
	Scan_Ptr := Scan_Ptr + 1;

	elsif (C = ESC
	and then
	Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
	or else
	(C in Upper_Half_Character
	and then
	Upper_Half_Encoding)
	or else
	(C = '['
	and then
	Source (Scan_Ptr + 1) = '"'
	and then
	Identifier_Char (Source (Scan_Ptr + 2)))
	then
	Scan_Wide (Source, Scan_Ptr, Code, Err);
	Accumulate_Checksum (Code);

	if Err then
	Error_Illegal_Wide_Character;
	Code := Get_Char_Code (' ');
	end if;

	else
	Accumulate_Checksum (C);

	if C not in Graphic_Character then
	if C in Line_Terminator then
	Error_Unterminated_String;
	exit;

	elsif C in Upper_Half_Character then
	if Ada_83 then
	Error_Bad_String_Char;
	end if;

	else
	Error_Bad_String_Char;
	end if;
	end if;

	Code := Get_Char_Code (C);
	Scan_Ptr := Scan_Ptr + 1;
	end if;
	end if;

	Store_String_Char (Code);

	if not In_Character_Range (Code) then
	Wide_Character_Found := True;
	end if;
	end loop;

	String_Literal_Id := End_String;
	Set_String;
	return;

	end Slit;