blob: b60ffc22f117a088ed14a4f2b29cd533e5b9a5fe [file] [log] [blame]
------------------------------------------------------------------------------
-- --
-- GNAT COMPILER COMPONENTS --
-- --
-- S C N . S L I T --
-- --
-- B o d y --
-- --
-- $Revision$
-- --
-- Copyright (C) 1992-2001 Free Software Foundation, Inc. --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
-- ware Foundation; either version 2, or (at your option) any later ver- --
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License --
-- for more details. You should have received a copy of the GNU General --
-- Public License distributed with GNAT; see file COPYING. If not, write --
-- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, --
-- MA 02111-1307, USA. --
-- --
-- GNAT was originally developed by the GNAT team at New York University. --
-- It is now maintained by Ada Core Technologies Inc (http://www.gnat.com). --
-- --
------------------------------------------------------------------------------
with Stringt; use Stringt;
separate (Scn)
procedure Slit is
Delimiter : Character;
-- Delimiter (first character of string)
C : Character;
-- Current source program character
Code : Char_Code;
-- Current character code value
Err : Boolean;
-- Error flag for Scan_Wide call
String_Literal_Id : String_Id;
-- Id for currently scanned string value
Wide_Character_Found : Boolean := False;
-- Set True if wide character found
procedure Error_Bad_String_Char;
-- Signal bad character in string/character literal. On entry Scan_Ptr
-- points to the improper character encountered during the scan. Scan_Ptr
-- is not modified, so it still points to the bad character on return.
procedure Error_Unterminated_String;
-- Procedure called if a line terminator character is encountered during
-- scanning a string, meaning that the string is not properly terminated.
procedure Set_String;
-- Procedure used to distinguish between string and operator symbol.
-- On entry the string has been scanned out, and its characters start
-- at Token_Ptr and end one character before Scan_Ptr. On exit Token
-- is set to Tok_String_Literal or Tok_Operator_Symbol as appropriate,
-- and Token_Node is appropriately initialized. In addition, in the
-- operator symbol case, Token_Name is appropriately set.
---------------------------
-- Error_Bad_String_Char --
---------------------------
procedure Error_Bad_String_Char is
C : constant Character := Source (Scan_Ptr);
begin
if C = HT then
Error_Msg_S ("horizontal tab not allowed in string");
elsif C = VT or else C = FF then
Error_Msg_S ("format effector not allowed in string");
elsif C in Upper_Half_Character then
Error_Msg_S ("(Ada 83) upper half character not allowed");
else
Error_Msg_S ("control character not allowed in string");
end if;
end Error_Bad_String_Char;
-------------------------------
-- Error_Unterminated_String --
-------------------------------
procedure Error_Unterminated_String is
begin
-- An interesting little refinement. Consider the following examples:
-- A := "this is an unterminated string;
-- A := "this is an unterminated string &
-- P(A, "this is a parameter that didn't get terminated);
-- We fiddle a little to do slightly better placement in these cases
-- also if there is white space at the end of the line we place the
-- flag at the start of this white space, not at the end. Note that
-- we only have to test for blanks, since tabs aren't allowed in
-- strings in the first place and would have caused an error message.
-- Two more cases that we treat specially are:
-- A := "this string uses the wrong terminator'
-- A := "this string uses the wrong terminator' &
-- In these cases we give a different error message as well
-- We actually reposition the scan pointer to the point where we
-- place the flag in these cases, since it seems a better bet on
-- the original intention.
while Source (Scan_Ptr - 1) = ' '
or else Source (Scan_Ptr - 1) = '&'
loop
Scan_Ptr := Scan_Ptr - 1;
Unstore_String_Char;
end loop;
-- Check for case of incorrect string terminator, but single quote is
-- not considered incorrect if the opening terminator misused a single
-- quote (error message already given).
if Delimiter /= '''
and then Source (Scan_Ptr - 1) = '''
then
Unstore_String_Char;
Error_Msg ("incorrect string terminator character", Scan_Ptr - 1);
return;
end if;
if Source (Scan_Ptr - 1) = ';' then
Scan_Ptr := Scan_Ptr - 1;
Unstore_String_Char;
if Source (Scan_Ptr - 1) = ')' then
Scan_Ptr := Scan_Ptr - 1;
Unstore_String_Char;
end if;
end if;
Error_Msg_S ("missing string quote");
end Error_Unterminated_String;
----------------
-- Set_String --
----------------
procedure Set_String is
Slen : Int := Int (Scan_Ptr - Token_Ptr - 2);
C1 : Character;
C2 : Character;
C3 : Character;
begin
-- Token_Name is currently set to Error_Name. The following section of
-- code resets Token_Name to the proper Name_Op_xx value if the string
-- is a valid operator symbol, otherwise it is left set to Error_Name.
if Slen = 1 then
C1 := Source (Token_Ptr + 1);
case C1 is
when '=' =>
Token_Name := Name_Op_Eq;
when '>' =>
Token_Name := Name_Op_Gt;
when '<' =>
Token_Name := Name_Op_Lt;
when '+' =>
Token_Name := Name_Op_Add;
when '-' =>
Token_Name := Name_Op_Subtract;
when '&' =>
Token_Name := Name_Op_Concat;
when '*' =>
Token_Name := Name_Op_Multiply;
when '/' =>
Token_Name := Name_Op_Divide;
when others =>
null;
end case;
elsif Slen = 2 then
C1 := Source (Token_Ptr + 1);
C2 := Source (Token_Ptr + 2);
if C1 = '*' and then C2 = '*' then
Token_Name := Name_Op_Expon;
elsif C2 = '=' then
if C1 = '/' then
Token_Name := Name_Op_Ne;
elsif C1 = '<' then
Token_Name := Name_Op_Le;
elsif C1 = '>' then
Token_Name := Name_Op_Ge;
end if;
elsif (C1 = 'O' or else C1 = 'o') and then -- OR
(C2 = 'R' or else C2 = 'r')
then
Token_Name := Name_Op_Or;
end if;
elsif Slen = 3 then
C1 := Source (Token_Ptr + 1);
C2 := Source (Token_Ptr + 2);
C3 := Source (Token_Ptr + 3);
if (C1 = 'A' or else C1 = 'a') and then -- AND
(C2 = 'N' or else C2 = 'n') and then
(C3 = 'D' or else C3 = 'd')
then
Token_Name := Name_Op_And;
elsif (C1 = 'A' or else C1 = 'a') and then -- ABS
(C2 = 'B' or else C2 = 'b') and then
(C3 = 'S' or else C3 = 's')
then
Token_Name := Name_Op_Abs;
elsif (C1 = 'M' or else C1 = 'm') and then -- MOD
(C2 = 'O' or else C2 = 'o') and then
(C3 = 'D' or else C3 = 'd')
then
Token_Name := Name_Op_Mod;
elsif (C1 = 'N' or else C1 = 'n') and then -- NOT
(C2 = 'O' or else C2 = 'o') and then
(C3 = 'T' or else C3 = 't')
then
Token_Name := Name_Op_Not;
elsif (C1 = 'R' or else C1 = 'r') and then -- REM
(C2 = 'E' or else C2 = 'e') and then
(C3 = 'M' or else C3 = 'm')
then
Token_Name := Name_Op_Rem;
elsif (C1 = 'X' or else C1 = 'x') and then -- XOR
(C2 = 'O' or else C2 = 'o') and then
(C3 = 'R' or else C3 = 'r')
then
Token_Name := Name_Op_Xor;
end if;
end if;
-- If it is an operator symbol, then Token_Name is set. If it is some
-- other string value, then Token_Name still contains Error_Name.
if Token_Name = Error_Name then
Token := Tok_String_Literal;
Token_Node := New_Node (N_String_Literal, Token_Ptr);
Set_Has_Wide_Character (Token_Node, Wide_Character_Found);
else
Token := Tok_Operator_Symbol;
Token_Node := New_Node (N_Operator_Symbol, Token_Ptr);
Set_Chars (Token_Node, Token_Name);
end if;
Set_Strval (Token_Node, String_Literal_Id);
end Set_String;
----------
-- Slit --
----------
begin
-- On entry, Scan_Ptr points to the opening character of the string which
-- is either a percent, double quote, or apostrophe (single quote). The
-- latter case is an error detected by the character literal circuit.
Delimiter := Source (Scan_Ptr);
Accumulate_Checksum (Delimiter);
Start_String;
Scan_Ptr := Scan_Ptr + 1;
-- Loop to scan out characters of string literal
loop
C := Source (Scan_Ptr);
if C = Delimiter then
Accumulate_Checksum (C);
Scan_Ptr := Scan_Ptr + 1;
exit when Source (Scan_Ptr) /= Delimiter;
Code := Get_Char_Code (C);
Accumulate_Checksum (C);
Scan_Ptr := Scan_Ptr + 1;
else
if C = '"' and then Delimiter = '%' then
Error_Msg_S ("quote not allowed in percent delimited string");
Code := Get_Char_Code (C);
Scan_Ptr := Scan_Ptr + 1;
elsif (C = ESC
and then
Wide_Character_Encoding_Method in WC_ESC_Encoding_Method)
or else
(C in Upper_Half_Character
and then
Upper_Half_Encoding)
or else
(C = '['
and then
Source (Scan_Ptr + 1) = '"'
and then
Identifier_Char (Source (Scan_Ptr + 2)))
then
Scan_Wide (Source, Scan_Ptr, Code, Err);
Accumulate_Checksum (Code);
if Err then
Error_Illegal_Wide_Character;
Code := Get_Char_Code (' ');
end if;
else
Accumulate_Checksum (C);
if C not in Graphic_Character then
if C in Line_Terminator then
Error_Unterminated_String;
exit;
elsif C in Upper_Half_Character then
if Ada_83 then
Error_Bad_String_Char;
end if;
else
Error_Bad_String_Char;
end if;
end if;
Code := Get_Char_Code (C);
Scan_Ptr := Scan_Ptr + 1;
end if;
end if;
Store_String_Char (Code);
if not In_Character_Range (Code) then
Wide_Character_Found := True;
end if;
end loop;
String_Literal_Id := End_String;
Set_String;
return;
end Slit;