gcc/ada/libgnat/a-zchuni.ads - gcc - Git at Google

 ------------------------------------------------------------------------------
 --                                                                          --
 --                         GNAT RUN-TIME COMPONENTS                         --
 --                                                                          --
 --    A D A . W I D E _ W I D E _ C H A R A C T E R T S . U N I C O D E    --
 --                                                                          --
 --                                 S p e c                                  --
 --                                                                          --
 --          Copyright (C) 2005-2022, Free Software Foundation, Inc.         --
 --                                                                          --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
 --                                                                          --
 -- As a special exception under Section 7 of GPL version 3, you are granted --
 -- additional permissions described in the GCC Runtime Library Exception,   --
 -- version 3.1, as published by the Free Software Foundation.               --
 --                                                                          --
 -- You should have received a copy of the GNU General Public License and    --
 -- a copy of the GCC Runtime Library Exception along with this program;     --
 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
 -- <http://www.gnu.org/licenses/>.                                          --
 --                                                                          --
 -- GNAT was originally developed  by the GNAT team at  New York University. --
 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
 --                                                                          --
 ------------------------------------------------------------------------------

 --  Unicode categorization routines for Wide_Wide_Character

 with System.UTF_32;

 package Ada.Wide_Wide_Characters.Unicode is
    pragma Pure;

    --  The following type defines the categories from the unicode definitions.
    --  The one addition we make is Fe, which represents the characters FFFE
    --  and FFFF in any of the planes.

    type Category is new System.UTF_32.Category;
    --  Cc   Other, Control
    --  Cf   Other, Format
    --  Cn   Other, Not Assigned
    --  Co   Other, Private Use
    --  Cs   Other, Surrogate
    --  Ll   Letter, Lowercase
    --  Lm   Letter, Modifier
    --  Lo   Letter, Other
    --  Lt   Letter, Titlecase
    --  Lu   Letter, Uppercase
    --  Mc   Mark, Spacing Combining
    --  Me   Mark, Enclosing
    --  Mn   Mark, Nonspacing
    --  Nd   Number, Decimal Digit
    --  Nl   Number, Letter
    --  No   Number, Other
    --  Pc   Punctuation, Connector
    --  Pd   Punctuation, Dash
    --  Pe   Punctuation, Close
    --  Pf   Punctuation, Final quote
    --  Pi   Punctuation, Initial quote
    --  Po   Punctuation, Other
    --  Ps   Punctuation, Open
    --  Sc   Symbol, Currency
    --  Sk   Symbol, Modifier
    --  Sm   Symbol, Math
    --  So   Symbol, Other
    --  Zl   Separator, Line
    --  Zp   Separator, Paragraph
    --  Zs   Separator, Space
    --  Fe   relative position FFFE/FFFF in plane

    function Get_Category (U : Wide_Wide_Character) return Category;
    pragma Inline (Get_Category);
    --  Given a Wide_Wide_Character, returns corresponding Category, or Cn if
    --  the code does not have an assigned unicode category.

    --  The following functions perform category tests corresponding to lexical
    --  classes defined in the Ada standard. There are two interfaces for each
    --  function. The second takes a Category (e.g. returned by Get_Category).
    --  The first takes a Wide_Wide_Character. The form taking the
    --  Wide_Wide_Character is typically more efficient than calling
    --  Get_Category, but if several different tests are to be performed on the
    --  same code, it is more efficient to use Get_Category to get the category,
    --  then test the resulting category.

    function Is_Letter (U : Wide_Wide_Character) return Boolean;
    function Is_Letter (C : Category)            return Boolean;
    pragma Inline (Is_Letter);
    --  Returns true iff U is a letter that can be used to start an identifier,
    --  or if C is one of the corresponding categories, which are the following:
    --    Letter, Uppercase (Lu)
    --    Letter, Lowercase (Ll)
    --    Letter, Titlecase (Lt)
    --    Letter, Modifier  (Lm)
    --    Letter, Other     (Lo)
    --    Number, Letter    (Nl)

    function Is_Digit (U : Wide_Wide_Character) return Boolean;
    function Is_Digit (C : Category)            return Boolean;
    pragma Inline (Is_Digit);
    --  Returns true iff U is a digit that can be used to extend an identifer,
    --  or if C is one of the corresponding categories, which are the following:
    --    Number, Decimal_Digit (Nd)

    function Is_Line_Terminator (U : Wide_Wide_Character) return Boolean;
    pragma Inline (Is_Line_Terminator);
    --  Returns true iff U is an allowed line terminator for source programs,
    --  if U is in the category Zp (Separator, Paragaph), or Zs (Separator,
    --  Line), or if U is a conventional line terminator (CR, LF, VT, FF).
    --  There is no category version for this function, since the set of
    --  characters does not correspond to a set of Unicode categories.

    function Is_Mark (U : Wide_Wide_Character) return Boolean;
    function Is_Mark (C : Category)            return Boolean;
    pragma Inline (Is_Mark);
    --  Returns true iff U is a mark character which can be used to extend an
    --  identifier, or if C is one of the corresponding categories, which are
    --  the following:
    --    Mark, Non-Spacing (Mn)
    --    Mark, Spacing Combining (Mc)

    function Is_Other (U : Wide_Wide_Character) return Boolean;
    function Is_Other (C : Category)            return Boolean;
    pragma Inline (Is_Other);
    --  Returns true iff U is an other format character, which means that it
    --  can be used to extend an identifier, but is ignored for the purposes of
    --  matching of identiers, or if C is one of the corresponding categories,
    --  which are the following:
    --    Other, Format (Cf)

    function Is_Punctuation (U : Wide_Wide_Character) return Boolean;
    function Is_Punctuation (C : Category)            return Boolean;
    pragma Inline (Is_Punctuation);
    --  Returns true iff U is a punctuation character that can be used to
    --  separate pices of an identifier, or if C is one of the corresponding
    --  categories, which are the following:
    --    Punctuation, Connector (Pc)

    function Is_Space (U : Wide_Wide_Character) return Boolean;
    function Is_Space (C : Category)            return Boolean;
    pragma Inline (Is_Space);
    --  Returns true iff U is considered a space to be ignored, or if C is one
    --  of the corresponding categories, which are the following:
    --    Separator, Space (Zs)

    function Is_NFKC (U : Wide_Wide_Character) return Boolean;
    pragma Inline (Is_NFKC);
    --  Returns True if the Wide_Wide_Character designated by U could be present
    --  in a string normalized to Normalization Form KC (as defined by Clause
    --  21 of ISO/IEC 10646:2017), otherwise returns False.

    function Is_Non_Graphic (U : Wide_Wide_Character) return Boolean;
    function Is_Non_Graphic (C : Category)            return Boolean;
    pragma Inline (Is_Non_Graphic);
    --  Returns true iff U is considered to be a non-graphic character, or if C
    --  is one of the corresponding categories, which are the following:
    --    Other, Control (Cc)
    --    Other, Private Use (Co)
    --    Other, Surrogate (Cs)
    --    Separator, Line (Zl)
    --    Separator, Paragraph (Zp)
    --    FFFE or FFFF positions in any plane (Fe)
    --
    --  Note that the Ada category format effector is subsumed by the above
    --  list of Unicode categories.
    --
    --  Note that Other, Unassiged (Cn) is quite deliberately not included
    --  in the list of categories above. This means that should any of these
    --  code positions be defined in future with graphic characters they will
    --  be allowed without a need to change implementations or the standard.
    --
    --  Note that Other, Format (Cf) is also quite deliberately not included
    --  in the list of categories above. This means that these characters can
    --  be included in character and string literals.

    function Is_Basic (U : Wide_Wide_Character) return Boolean;
    pragma Inline (Is_Basic);
    --  Returns True if the Wide_Wide_Character designated by Item has no
    --  Decomposition Mapping in the code charts of ISO/IEC 10646:2017,
    --  otherwise returns False.

    function To_Basic (U : Wide_Wide_Character) return Wide_Wide_Character;
    pragma Inline (To_Basic);
    --  Returns the Wide_Wide_Character whose code point is given by the first
    --  value of its Decomposition Mapping in the code charts of
    --  ISO/IEC 10646:2017 if any, returns Item otherwise.

    --  The following function is used to fold to upper case, as required by
    --  the Ada 2005 standard rules for identifier case folding. Two
    --  identifiers are equivalent if they are identical after folding all
    --  letters to upper case using this routine. A fold to lower routine is
    --  also provided.

    function To_Lower_Case
      (U : Wide_Wide_Character) return Wide_Wide_Character;
    pragma Inline (To_Lower_Case);
    --  If U represents an upper case letter, returns the corresponding lower
    --  case letter, otherwise U is returned unchanged. The folding is locale
    --  independent as defined by documents referenced in the note in section
    --  1 of ISO/IEC 10646:2003

    function To_Upper_Case
      (U : Wide_Wide_Character) return Wide_Wide_Character;
    pragma Inline (To_Upper_Case);
    --  If U represents a lower case letter, returns the corresponding upper
    --  case letter, otherwise U is returned unchanged. The folding is locale
    --  independent as defined by documents referenced in the note in section
    --  1 of ISO/IEC 10646:2003

 end Ada.Wide_Wide_Characters.Unicode;
	------------------------------------------------------------------------------
	-- --
	-- GNAT RUN-TIME COMPONENTS --
	-- --
	-- A D A . W I D E _ W I D E _ C H A R A C T E R T S . U N I C O D E --
	-- --
	-- S p e c --
	-- --
	-- Copyright (C) 2005-2022, Free Software Foundation, Inc. --
	-- --
	-- GNAT is free software; you can redistribute it and/or modify it under --
	-- terms of the GNU General Public License as published by the Free Soft- --
	-- ware Foundation; either version 3, or (at your option) any later ver- --
	-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
	-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
	-- or FITNESS FOR A PARTICULAR PURPOSE. --
	-- --
	-- As a special exception under Section 7 of GPL version 3, you are granted --
	-- additional permissions described in the GCC Runtime Library Exception, --
	-- version 3.1, as published by the Free Software Foundation. --
	-- --
	-- You should have received a copy of the GNU General Public License and --
	-- a copy of the GCC Runtime Library Exception along with this program; --
	-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
	-- <http://www.gnu.org/licenses/>. --
	-- --
	-- GNAT was originally developed by the GNAT team at New York University. --
	-- Extensive contributions were provided by Ada Core Technologies Inc. --
	-- --
	------------------------------------------------------------------------------

	-- Unicode categorization routines for Wide_Wide_Character

	with System.UTF_32;

	package Ada.Wide_Wide_Characters.Unicode is
	pragma Pure;

	-- The following type defines the categories from the unicode definitions.
	-- The one addition we make is Fe, which represents the characters FFFE
	-- and FFFF in any of the planes.

	type Category is new System.UTF_32.Category;
	-- Cc Other, Control
	-- Cf Other, Format
	-- Cn Other, Not Assigned
	-- Co Other, Private Use
	-- Cs Other, Surrogate
	-- Ll Letter, Lowercase
	-- Lm Letter, Modifier
	-- Lo Letter, Other
	-- Lt Letter, Titlecase
	-- Lu Letter, Uppercase
	-- Mc Mark, Spacing Combining
	-- Me Mark, Enclosing
	-- Mn Mark, Nonspacing
	-- Nd Number, Decimal Digit
	-- Nl Number, Letter
	-- No Number, Other
	-- Pc Punctuation, Connector
	-- Pd Punctuation, Dash
	-- Pe Punctuation, Close
	-- Pf Punctuation, Final quote
	-- Pi Punctuation, Initial quote
	-- Po Punctuation, Other
	-- Ps Punctuation, Open
	-- Sc Symbol, Currency
	-- Sk Symbol, Modifier
	-- Sm Symbol, Math
	-- So Symbol, Other
	-- Zl Separator, Line
	-- Zp Separator, Paragraph
	-- Zs Separator, Space
	-- Fe relative position FFFE/FFFF in plane

	function Get_Category (U : Wide_Wide_Character) return Category;
	pragma Inline (Get_Category);
	-- Given a Wide_Wide_Character, returns corresponding Category, or Cn if
	-- the code does not have an assigned unicode category.

	-- The following functions perform category tests corresponding to lexical
	-- classes defined in the Ada standard. There are two interfaces for each
	-- function. The second takes a Category (e.g. returned by Get_Category).
	-- The first takes a Wide_Wide_Character. The form taking the
	-- Wide_Wide_Character is typically more efficient than calling
	-- Get_Category, but if several different tests are to be performed on the
	-- same code, it is more efficient to use Get_Category to get the category,
	-- then test the resulting category.

	function Is_Letter (U : Wide_Wide_Character) return Boolean;
	function Is_Letter (C : Category) return Boolean;
	pragma Inline (Is_Letter);
	-- Returns true iff U is a letter that can be used to start an identifier,
	-- or if C is one of the corresponding categories, which are the following:
	-- Letter, Uppercase (Lu)
	-- Letter, Lowercase (Ll)
	-- Letter, Titlecase (Lt)
	-- Letter, Modifier (Lm)
	-- Letter, Other (Lo)
	-- Number, Letter (Nl)

	function Is_Digit (U : Wide_Wide_Character) return Boolean;
	function Is_Digit (C : Category) return Boolean;
	pragma Inline (Is_Digit);
	-- Returns true iff U is a digit that can be used to extend an identifer,
	-- or if C is one of the corresponding categories, which are the following:
	-- Number, Decimal_Digit (Nd)

	function Is_Line_Terminator (U : Wide_Wide_Character) return Boolean;
	pragma Inline (Is_Line_Terminator);
	-- Returns true iff U is an allowed line terminator for source programs,
	-- if U is in the category Zp (Separator, Paragaph), or Zs (Separator,
	-- Line), or if U is a conventional line terminator (CR, LF, VT, FF).
	-- There is no category version for this function, since the set of
	-- characters does not correspond to a set of Unicode categories.

	function Is_Mark (U : Wide_Wide_Character) return Boolean;
	function Is_Mark (C : Category) return Boolean;
	pragma Inline (Is_Mark);
	-- Returns true iff U is a mark character which can be used to extend an
	-- identifier, or if C is one of the corresponding categories, which are
	-- the following:
	-- Mark, Non-Spacing (Mn)
	-- Mark, Spacing Combining (Mc)

	function Is_Other (U : Wide_Wide_Character) return Boolean;
	function Is_Other (C : Category) return Boolean;
	pragma Inline (Is_Other);
	-- Returns true iff U is an other format character, which means that it
	-- can be used to extend an identifier, but is ignored for the purposes of
	-- matching of identiers, or if C is one of the corresponding categories,
	-- which are the following:
	-- Other, Format (Cf)

	function Is_Punctuation (U : Wide_Wide_Character) return Boolean;
	function Is_Punctuation (C : Category) return Boolean;
	pragma Inline (Is_Punctuation);
	-- Returns true iff U is a punctuation character that can be used to
	-- separate pices of an identifier, or if C is one of the corresponding
	-- categories, which are the following:
	-- Punctuation, Connector (Pc)

	function Is_Space (U : Wide_Wide_Character) return Boolean;
	function Is_Space (C : Category) return Boolean;
	pragma Inline (Is_Space);
	-- Returns true iff U is considered a space to be ignored, or if C is one
	-- of the corresponding categories, which are the following:
	-- Separator, Space (Zs)

	function Is_NFKC (U : Wide_Wide_Character) return Boolean;
	pragma Inline (Is_NFKC);
	-- Returns True if the Wide_Wide_Character designated by U could be present
	-- in a string normalized to Normalization Form KC (as defined by Clause
	-- 21 of ISO/IEC 10646:2017), otherwise returns False.

	function Is_Non_Graphic (U : Wide_Wide_Character) return Boolean;
	function Is_Non_Graphic (C : Category) return Boolean;
	pragma Inline (Is_Non_Graphic);
	-- Returns true iff U is considered to be a non-graphic character, or if C
	-- is one of the corresponding categories, which are the following:
	-- Other, Control (Cc)
	-- Other, Private Use (Co)
	-- Other, Surrogate (Cs)
	-- Separator, Line (Zl)
	-- Separator, Paragraph (Zp)
	-- FFFE or FFFF positions in any plane (Fe)
	--
	-- Note that the Ada category format effector is subsumed by the above
	-- list of Unicode categories.
	--
	-- Note that Other, Unassiged (Cn) is quite deliberately not included
	-- in the list of categories above. This means that should any of these
	-- code positions be defined in future with graphic characters they will
	-- be allowed without a need to change implementations or the standard.
	--
	-- Note that Other, Format (Cf) is also quite deliberately not included
	-- in the list of categories above. This means that these characters can
	-- be included in character and string literals.

	function Is_Basic (U : Wide_Wide_Character) return Boolean;
	pragma Inline (Is_Basic);
	-- Returns True if the Wide_Wide_Character designated by Item has no
	-- Decomposition Mapping in the code charts of ISO/IEC 10646:2017,
	-- otherwise returns False.

	function To_Basic (U : Wide_Wide_Character) return Wide_Wide_Character;
	pragma Inline (To_Basic);
	-- Returns the Wide_Wide_Character whose code point is given by the first
	-- value of its Decomposition Mapping in the code charts of
	-- ISO/IEC 10646:2017 if any, returns Item otherwise.

	-- The following function is used to fold to upper case, as required by
	-- the Ada 2005 standard rules for identifier case folding. Two
	-- identifiers are equivalent if they are identical after folding all
	-- letters to upper case using this routine. A fold to lower routine is
	-- also provided.

	function To_Lower_Case
	(U : Wide_Wide_Character) return Wide_Wide_Character;
	pragma Inline (To_Lower_Case);
	-- If U represents an upper case letter, returns the corresponding lower
	-- case letter, otherwise U is returned unchanged. The folding is locale
	-- independent as defined by documents referenced in the note in section
	-- 1 of ISO/IEC 10646:2003

	function To_Upper_Case
	(U : Wide_Wide_Character) return Wide_Wide_Character;
	pragma Inline (To_Upper_Case);
	-- If U represents a lower case letter, returns the corresponding upper
	-- case letter, otherwise U is returned unchanged. The folding is locale
	-- independent as defined by documents referenced in the note in section
	-- 1 of ISO/IEC 10646:2003

	end Ada.Wide_Wide_Characters.Unicode;