| ------------------------------------------------------------------------------ |
| -- -- |
| -- GNAT RUN-TIME COMPONENTS -- |
| -- -- |
| -- A D A . C H A R A C T E R S . H A N D L I N G -- |
| -- -- |
| -- B o d y -- |
| -- -- |
| -- Copyright (C) 1992-2013, Free Software Foundation, Inc. -- |
| -- -- |
| -- GNAT is free software; you can redistribute it and/or modify it under -- |
| -- terms of the GNU General Public License as published by the Free Soft- -- |
| -- ware Foundation; either version 3, or (at your option) any later ver- -- |
| -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- |
| -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- |
| -- or FITNESS FOR A PARTICULAR PURPOSE. -- |
| -- -- |
| -- As a special exception under Section 7 of GPL version 3, you are granted -- |
| -- additional permissions described in the GCC Runtime Library Exception, -- |
| -- version 3.1, as published by the Free Software Foundation. -- |
| -- -- |
| -- You should have received a copy of the GNU General Public License and -- |
| -- a copy of the GCC Runtime Library Exception along with this program; -- |
| -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- |
| -- <http://www.gnu.org/licenses/>. -- |
| -- -- |
| -- GNAT was originally developed by the GNAT team at New York University. -- |
| -- Extensive contributions were provided by Ada Core Technologies Inc. -- |
| -- -- |
| ------------------------------------------------------------------------------ |
| |
| with Ada.Characters.Latin_1; use Ada.Characters.Latin_1; |
| with Ada.Strings.Maps; use Ada.Strings.Maps; |
| with Ada.Strings.Maps.Constants; use Ada.Strings.Maps.Constants; |
| |
| package body Ada.Characters.Handling is |
| |
| ------------------------------------ |
| -- Character Classification Table -- |
| ------------------------------------ |
| |
| type Character_Flags is mod 256; |
| for Character_Flags'Size use 8; |
| |
| Control : constant Character_Flags := 1; |
| Lower : constant Character_Flags := 2; |
| Upper : constant Character_Flags := 4; |
| Basic : constant Character_Flags := 8; |
| Hex_Digit : constant Character_Flags := 16; |
| Digit : constant Character_Flags := 32; |
| Special : constant Character_Flags := 64; |
| Line_Term : constant Character_Flags := 128; |
| |
| Letter : constant Character_Flags := Lower or Upper; |
| Alphanum : constant Character_Flags := Letter or Digit; |
| Graphic : constant Character_Flags := Alphanum or Special; |
| |
| Char_Map : constant array (Character) of Character_Flags := |
| ( |
| NUL => Control, |
| SOH => Control, |
| STX => Control, |
| ETX => Control, |
| EOT => Control, |
| ENQ => Control, |
| ACK => Control, |
| BEL => Control, |
| BS => Control, |
| HT => Control, |
| LF => Control + Line_Term, |
| VT => Control + Line_Term, |
| FF => Control + Line_Term, |
| CR => Control + Line_Term, |
| SO => Control, |
| SI => Control, |
| |
| DLE => Control, |
| DC1 => Control, |
| DC2 => Control, |
| DC3 => Control, |
| DC4 => Control, |
| NAK => Control, |
| SYN => Control, |
| ETB => Control, |
| CAN => Control, |
| EM => Control, |
| SUB => Control, |
| ESC => Control, |
| FS => Control, |
| GS => Control, |
| RS => Control, |
| US => Control, |
| |
| Space => Special, |
| Exclamation => Special, |
| Quotation => Special, |
| Number_Sign => Special, |
| Dollar_Sign => Special, |
| Percent_Sign => Special, |
| Ampersand => Special, |
| Apostrophe => Special, |
| Left_Parenthesis => Special, |
| Right_Parenthesis => Special, |
| Asterisk => Special, |
| Plus_Sign => Special, |
| Comma => Special, |
| Hyphen => Special, |
| Full_Stop => Special, |
| Solidus => Special, |
| |
| '0' .. '9' => Digit + Hex_Digit, |
| |
| Colon => Special, |
| Semicolon => Special, |
| Less_Than_Sign => Special, |
| Equals_Sign => Special, |
| Greater_Than_Sign => Special, |
| Question => Special, |
| Commercial_At => Special, |
| |
| 'A' .. 'F' => Upper + Basic + Hex_Digit, |
| 'G' .. 'Z' => Upper + Basic, |
| |
| Left_Square_Bracket => Special, |
| Reverse_Solidus => Special, |
| Right_Square_Bracket => Special, |
| Circumflex => Special, |
| Low_Line => Special, |
| Grave => Special, |
| |
| 'a' .. 'f' => Lower + Basic + Hex_Digit, |
| 'g' .. 'z' => Lower + Basic, |
| |
| Left_Curly_Bracket => Special, |
| Vertical_Line => Special, |
| Right_Curly_Bracket => Special, |
| Tilde => Special, |
| |
| DEL => Control, |
| Reserved_128 => Control, |
| Reserved_129 => Control, |
| BPH => Control, |
| NBH => Control, |
| Reserved_132 => Control, |
| NEL => Control + Line_Term, |
| SSA => Control, |
| ESA => Control, |
| HTS => Control, |
| HTJ => Control, |
| VTS => Control, |
| PLD => Control, |
| PLU => Control, |
| RI => Control, |
| SS2 => Control, |
| SS3 => Control, |
| |
| DCS => Control, |
| PU1 => Control, |
| PU2 => Control, |
| STS => Control, |
| CCH => Control, |
| MW => Control, |
| SPA => Control, |
| EPA => Control, |
| |
| SOS => Control, |
| Reserved_153 => Control, |
| SCI => Control, |
| CSI => Control, |
| ST => Control, |
| OSC => Control, |
| PM => Control, |
| APC => Control, |
| |
| No_Break_Space => Special, |
| Inverted_Exclamation => Special, |
| Cent_Sign => Special, |
| Pound_Sign => Special, |
| Currency_Sign => Special, |
| Yen_Sign => Special, |
| Broken_Bar => Special, |
| Section_Sign => Special, |
| Diaeresis => Special, |
| Copyright_Sign => Special, |
| Feminine_Ordinal_Indicator => Special, |
| Left_Angle_Quotation => Special, |
| Not_Sign => Special, |
| Soft_Hyphen => Special, |
| Registered_Trade_Mark_Sign => Special, |
| Macron => Special, |
| Degree_Sign => Special, |
| Plus_Minus_Sign => Special, |
| Superscript_Two => Special, |
| Superscript_Three => Special, |
| Acute => Special, |
| Micro_Sign => Special, |
| Pilcrow_Sign => Special, |
| Middle_Dot => Special, |
| Cedilla => Special, |
| Superscript_One => Special, |
| Masculine_Ordinal_Indicator => Special, |
| Right_Angle_Quotation => Special, |
| Fraction_One_Quarter => Special, |
| Fraction_One_Half => Special, |
| Fraction_Three_Quarters => Special, |
| Inverted_Question => Special, |
| |
| UC_A_Grave => Upper, |
| UC_A_Acute => Upper, |
| UC_A_Circumflex => Upper, |
| UC_A_Tilde => Upper, |
| UC_A_Diaeresis => Upper, |
| UC_A_Ring => Upper, |
| UC_AE_Diphthong => Upper + Basic, |
| UC_C_Cedilla => Upper, |
| UC_E_Grave => Upper, |
| UC_E_Acute => Upper, |
| UC_E_Circumflex => Upper, |
| UC_E_Diaeresis => Upper, |
| UC_I_Grave => Upper, |
| UC_I_Acute => Upper, |
| UC_I_Circumflex => Upper, |
| UC_I_Diaeresis => Upper, |
| UC_Icelandic_Eth => Upper + Basic, |
| UC_N_Tilde => Upper, |
| UC_O_Grave => Upper, |
| UC_O_Acute => Upper, |
| UC_O_Circumflex => Upper, |
| UC_O_Tilde => Upper, |
| UC_O_Diaeresis => Upper, |
| |
| Multiplication_Sign => Special, |
| |
| UC_O_Oblique_Stroke => Upper, |
| UC_U_Grave => Upper, |
| UC_U_Acute => Upper, |
| UC_U_Circumflex => Upper, |
| UC_U_Diaeresis => Upper, |
| UC_Y_Acute => Upper, |
| UC_Icelandic_Thorn => Upper + Basic, |
| |
| LC_German_Sharp_S => Lower + Basic, |
| LC_A_Grave => Lower, |
| LC_A_Acute => Lower, |
| LC_A_Circumflex => Lower, |
| LC_A_Tilde => Lower, |
| LC_A_Diaeresis => Lower, |
| LC_A_Ring => Lower, |
| LC_AE_Diphthong => Lower + Basic, |
| LC_C_Cedilla => Lower, |
| LC_E_Grave => Lower, |
| LC_E_Acute => Lower, |
| LC_E_Circumflex => Lower, |
| LC_E_Diaeresis => Lower, |
| LC_I_Grave => Lower, |
| LC_I_Acute => Lower, |
| LC_I_Circumflex => Lower, |
| LC_I_Diaeresis => Lower, |
| LC_Icelandic_Eth => Lower + Basic, |
| LC_N_Tilde => Lower, |
| LC_O_Grave => Lower, |
| LC_O_Acute => Lower, |
| LC_O_Circumflex => Lower, |
| LC_O_Tilde => Lower, |
| LC_O_Diaeresis => Lower, |
| |
| Division_Sign => Special, |
| |
| LC_O_Oblique_Stroke => Lower, |
| LC_U_Grave => Lower, |
| LC_U_Acute => Lower, |
| LC_U_Circumflex => Lower, |
| LC_U_Diaeresis => Lower, |
| LC_Y_Acute => Lower, |
| LC_Icelandic_Thorn => Lower + Basic, |
| LC_Y_Diaeresis => Lower |
| ); |
| |
| --------------------- |
| -- Is_Alphanumeric -- |
| --------------------- |
| |
| function Is_Alphanumeric (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Alphanum) /= 0; |
| end Is_Alphanumeric; |
| |
| -------------- |
| -- Is_Basic -- |
| -------------- |
| |
| function Is_Basic (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Basic) /= 0; |
| end Is_Basic; |
| |
| ------------------ |
| -- Is_Character -- |
| ------------------ |
| |
| function Is_Character (Item : Wide_Character) return Boolean is |
| begin |
| return Wide_Character'Pos (Item) < 256; |
| end Is_Character; |
| |
| ---------------- |
| -- Is_Control -- |
| ---------------- |
| |
| function Is_Control (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Control) /= 0; |
| end Is_Control; |
| |
| -------------- |
| -- Is_Digit -- |
| -------------- |
| |
| function Is_Digit (Item : Character) return Boolean is |
| begin |
| return Item in '0' .. '9'; |
| end Is_Digit; |
| |
| ---------------- |
| -- Is_Graphic -- |
| ---------------- |
| |
| function Is_Graphic (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Graphic) /= 0; |
| end Is_Graphic; |
| |
| -------------------------- |
| -- Is_Hexadecimal_Digit -- |
| -------------------------- |
| |
| function Is_Hexadecimal_Digit (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Hex_Digit) /= 0; |
| end Is_Hexadecimal_Digit; |
| |
| ---------------- |
| -- Is_ISO_646 -- |
| ---------------- |
| |
| function Is_ISO_646 (Item : Character) return Boolean is |
| begin |
| return Item in ISO_646; |
| end Is_ISO_646; |
| |
| -- Note: much more efficient coding of the following function is possible |
| -- by testing several 16#80# bits in a complete word in a single operation |
| |
| function Is_ISO_646 (Item : String) return Boolean is |
| begin |
| for J in Item'Range loop |
| if Item (J) not in ISO_646 then |
| return False; |
| end if; |
| end loop; |
| |
| return True; |
| end Is_ISO_646; |
| |
| --------------- |
| -- Is_Letter -- |
| --------------- |
| |
| function Is_Letter (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Letter) /= 0; |
| end Is_Letter; |
| |
| ------------------------ |
| -- Is_Line_Terminator -- |
| ------------------------ |
| |
| function Is_Line_Terminator (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Line_Term) /= 0; |
| end Is_Line_Terminator; |
| |
| -------------- |
| -- Is_Lower -- |
| -------------- |
| |
| function Is_Lower (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Lower) /= 0; |
| end Is_Lower; |
| |
| ------------- |
| -- Is_Mark -- |
| ------------- |
| |
| function Is_Mark (Item : Character) return Boolean is |
| pragma Unreferenced (Item); |
| begin |
| return False; |
| end Is_Mark; |
| |
| --------------------- |
| -- Is_Other_Format -- |
| --------------------- |
| |
| function Is_Other_Format (Item : Character) return Boolean is |
| begin |
| return Item = Soft_Hyphen; |
| end Is_Other_Format; |
| |
| ------------------------------ |
| -- Is_Punctuation_Connector -- |
| ------------------------------ |
| |
| function Is_Punctuation_Connector (Item : Character) return Boolean is |
| begin |
| return Item = '_'; |
| end Is_Punctuation_Connector; |
| |
| -------------- |
| -- Is_Space -- |
| -------------- |
| |
| function Is_Space (Item : Character) return Boolean is |
| begin |
| return Item = ' ' or else Item = No_Break_Space; |
| end Is_Space; |
| |
| ---------------- |
| -- Is_Special -- |
| ---------------- |
| |
| function Is_Special (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Special) /= 0; |
| end Is_Special; |
| |
| --------------- |
| -- Is_String -- |
| --------------- |
| |
| function Is_String (Item : Wide_String) return Boolean is |
| begin |
| for J in Item'Range loop |
| if Wide_Character'Pos (Item (J)) >= 256 then |
| return False; |
| end if; |
| end loop; |
| |
| return True; |
| end Is_String; |
| |
| -------------- |
| -- Is_Upper -- |
| -------------- |
| |
| function Is_Upper (Item : Character) return Boolean is |
| begin |
| return (Char_Map (Item) and Upper) /= 0; |
| end Is_Upper; |
| |
| -------------- |
| -- To_Basic -- |
| -------------- |
| |
| function To_Basic (Item : Character) return Character is |
| begin |
| return Value (Basic_Map, Item); |
| end To_Basic; |
| |
| function To_Basic (Item : String) return String is |
| begin |
| return Result : String (1 .. Item'Length) do |
| for J in Item'Range loop |
| Result (J - (Item'First - 1)) := Value (Basic_Map, Item (J)); |
| end loop; |
| end return; |
| end To_Basic; |
| |
| ------------------ |
| -- To_Character -- |
| ------------------ |
| |
| function To_Character |
| (Item : Wide_Character; |
| Substitute : Character := ' ') return Character |
| is |
| begin |
| if Is_Character (Item) then |
| return Character'Val (Wide_Character'Pos (Item)); |
| else |
| return Substitute; |
| end if; |
| end To_Character; |
| |
| ---------------- |
| -- To_ISO_646 -- |
| ---------------- |
| |
| function To_ISO_646 |
| (Item : Character; |
| Substitute : ISO_646 := ' ') return ISO_646 |
| is |
| begin |
| return (if Item in ISO_646 then Item else Substitute); |
| end To_ISO_646; |
| |
| function To_ISO_646 |
| (Item : String; |
| Substitute : ISO_646 := ' ') return String |
| is |
| Result : String (1 .. Item'Length); |
| |
| begin |
| for J in Item'Range loop |
| Result (J - (Item'First - 1)) := |
| (if Item (J) in ISO_646 then Item (J) else Substitute); |
| end loop; |
| |
| return Result; |
| end To_ISO_646; |
| |
| -------------- |
| -- To_Lower -- |
| -------------- |
| |
| function To_Lower (Item : Character) return Character is |
| begin |
| return Value (Lower_Case_Map, Item); |
| end To_Lower; |
| |
| function To_Lower (Item : String) return String is |
| begin |
| return Result : String (1 .. Item'Length) do |
| for J in Item'Range loop |
| Result (J - (Item'First - 1)) := Value (Lower_Case_Map, Item (J)); |
| end loop; |
| end return; |
| end To_Lower; |
| |
| --------------- |
| -- To_String -- |
| --------------- |
| |
| function To_String |
| (Item : Wide_String; |
| Substitute : Character := ' ') return String |
| is |
| Result : String (1 .. Item'Length); |
| |
| begin |
| for J in Item'Range loop |
| Result (J - (Item'First - 1)) := To_Character (Item (J), Substitute); |
| end loop; |
| |
| return Result; |
| end To_String; |
| |
| -------------- |
| -- To_Upper -- |
| -------------- |
| |
| function To_Upper |
| (Item : Character) return Character |
| is |
| begin |
| return Value (Upper_Case_Map, Item); |
| end To_Upper; |
| |
| function To_Upper |
| (Item : String) return String |
| is |
| begin |
| return Result : String (1 .. Item'Length) do |
| for J in Item'Range loop |
| Result (J - (Item'First - 1)) := Value (Upper_Case_Map, Item (J)); |
| end loop; |
| end return; |
| end To_Upper; |
| |
| ----------------------- |
| -- To_Wide_Character -- |
| ----------------------- |
| |
| function To_Wide_Character |
| (Item : Character) return Wide_Character |
| is |
| begin |
| return Wide_Character'Val (Character'Pos (Item)); |
| end To_Wide_Character; |
| |
| -------------------- |
| -- To_Wide_String -- |
| -------------------- |
| |
| function To_Wide_String |
| (Item : String) return Wide_String |
| is |
| Result : Wide_String (1 .. Item'Length); |
| |
| begin |
| for J in Item'Range loop |
| Result (J - (Item'First - 1)) := To_Wide_Character (Item (J)); |
| end loop; |
| |
| return Result; |
| end To_Wide_String; |
| |
| end Ada.Characters.Handling; |