gcc/ada/a-suenco.adb - gcc - Git at Google

 ------------------------------------------------------------------------------
 --                                                                          --
 --                         GNAT RUN-TIME COMPONENTS                         --
 --                                                                          --
 --                   ADA.STRINGS.UTF_ENCODING.CONVERSIONS                   --
 --                                                                          --
 --                                 B o d y                                  --
 --                                                                          --
 --          Copyright (C) 2010-2014, Free Software Foundation, Inc.         --
 --                                                                          --
 -- GNAT is free software;  you can  redistribute it  and/or modify it under --
 -- terms of the  GNU General Public License as published  by the Free Soft- --
 -- ware  Foundation;  either version 3,  or (at your option) any later ver- --
 -- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
 -- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
 -- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
 --                                                                          --
 -- As a special exception under Section 7 of GPL version 3, you are granted --
 -- additional permissions described in the GCC Runtime Library Exception,   --
 -- version 3.1, as published by the Free Software Foundation.               --
 --                                                                          --
 -- You should have received a copy of the GNU General Public License and    --
 -- a copy of the GCC Runtime Library Exception along with this program;     --
 -- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
 -- <http://www.gnu.org/licenses/>.                                          --
 --                                                                          --
 -- GNAT was originally developed  by the GNAT team at  New York University. --
 -- Extensive contributions were provided by Ada Core Technologies Inc.      --
 --                                                                          --
 ------------------------------------------------------------------------------

 package body Ada.Strings.UTF_Encoding.Conversions is
    use Interfaces;

    --  Convert from UTF-8/UTF-16BE/LE to UTF-8/UTF-16BE/LE

    function Convert
      (Item          : UTF_String;
       Input_Scheme  : Encoding_Scheme;
       Output_Scheme : Encoding_Scheme;
       Output_BOM    : Boolean := False) return UTF_String
    is
    begin
       --  Nothing to do if identical schemes, but for UTF_8 we need to
       --  handle overlong encodings, so need to do the full conversion.

       if Input_Scheme = Output_Scheme
         and then Input_Scheme /= UTF_8
       then
          return Item;

       --  For remaining cases, one or other of the operands is UTF-16BE/LE
       --  encoded, or we have the UTF-8 to UTF-8 case where we must handle
       --  overlong encodings. In all cases,  go through UTF-16 intermediate.

       else
          return Convert (UTF_16_Wide_String'(Convert (Item, Input_Scheme)),
                          Output_Scheme, Output_BOM);
       end if;
    end Convert;

    --  Convert from UTF-8/UTF-16BE/LE to UTF-16

    function Convert
      (Item          : UTF_String;
       Input_Scheme  : Encoding_Scheme;
       Output_BOM    : Boolean := False) return UTF_16_Wide_String
    is
    begin
       if Input_Scheme = UTF_8 then
          return Convert (Item, Output_BOM);
       else
          return To_UTF_16 (Item, Input_Scheme, Output_BOM);
       end if;
    end Convert;

    --  Convert from UTF-8 to UTF-16

    function Convert
      (Item       : UTF_8_String;
       Output_BOM : Boolean := False) return UTF_16_Wide_String
    is
       Result : UTF_16_Wide_String (1 .. Item'Length + 1);
       --  Maximum length of result, including possible BOM

       Len : Natural := 0;
       --  Number of characters stored so far in Result

       Iptr : Natural;
       --  Next character to process in Item

       C : Unsigned_8;
       --  Input UTF-8 code

       R : Unsigned_16;
       --  Output UTF-16 code

       procedure Get_Continuation;
       --  Reads a continuation byte of the form 10xxxxxx, shifts R left by 6
       --  bits, and or's in the xxxxxx to the low order 6 bits. On return Ptr
       --  is incremented. Raises exception if continuation byte does not exist
       --  or is invalid.

       ----------------------
       -- Get_Continuation --
       ----------------------

       procedure Get_Continuation is
       begin
          if Iptr > Item'Last then
             Raise_Encoding_Error (Iptr - 1);

          else
             C := To_Unsigned_8 (Item (Iptr));
             Iptr := Iptr + 1;

             if C < 2#10_000000# or else C > 2#10_111111# then
                Raise_Encoding_Error (Iptr - 1);

             else
                R :=
                  Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#);
             end if;
          end if;
       end Get_Continuation;

    --  Start of processing for Convert

    begin
       --  Output BOM if required

       if Output_BOM then
          Len := Len + 1;
          Result (Len) := BOM_16 (1);
       end if;

       --  Skip OK BOM

       Iptr := Item'First;

       if Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
          Iptr := Iptr + 3;

       --  Error if bad BOM

       elsif Item'Length >= 2
         and then (Item (Iptr .. Iptr + 1) = BOM_16BE
                     or else
                   Item (Iptr .. Iptr + 1) = BOM_16LE)
       then
          Raise_Encoding_Error (Iptr);

       --  No BOM present

       else
          Iptr := Item'First;
       end if;

       while Iptr <= Item'Last loop
          C := To_Unsigned_8 (Item (Iptr));
          Iptr := Iptr + 1;

          --  Codes in the range 16#00# .. 16#7F#
          --    UTF-8:  0xxxxxxx
          --    UTF-16: 00000000_0xxxxxxx

          if C <= 16#7F# then
             Len := Len + 1;
             Result (Len) := Wide_Character'Val (C);

          --  No initial code can be of the form 10xxxxxx. Such codes are used
          --  only for continuations.

          elsif C <= 2#10_111111# then
             Raise_Encoding_Error (Iptr - 1);

          --  Codes in the range 16#80# .. 16#7FF#
          --    UTF-8:  110yyyxx 10xxxxxx
          --    UTF-16: 00000yyy_xxxxxxxx

          elsif C <= 2#110_11111# then
             R := Unsigned_16 (C and 2#000_11111#);
             Get_Continuation;
             Len := Len + 1;
             Result (Len) := Wide_Character'Val (R);

          --  Codes in the range 16#800# .. 16#D7FF or 16#DF01# .. 16#FFFF#
          --    UTF-8:  1110yyyy 10yyyyxx 10xxxxxx
          --    UTF-16: yyyyyyyy_xxxxxxxx

          elsif C <= 2#1110_1111# then
             R := Unsigned_16 (C and 2#0000_1111#);
             Get_Continuation;
             Get_Continuation;
             Len := Len + 1;
             Result (Len) := Wide_Character'Val (R);

             --  Make sure that we don't have a result in the forbidden range
             --  reserved for UTF-16 surrogate characters.

             if R in 16#D800# .. 16#DF00# then
                Raise_Encoding_Error (Iptr - 3);
             end if;

          --  Codes in the range 16#10000# .. 16#10FFFF#
          --    UTF-8:  11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
          --    UTF-16: 110110zz_zzyyyyyy 110111yy_xxxxxxxx
          --    Note: zzzz in the output is input zzzzz - 1

          elsif C <= 2#11110_111# then
             R := Unsigned_16 (C and 2#00000_111#);
             Get_Continuation;

             --  R now has zzzzzyyyy

             --  At this stage, we check for the case where we have an overlong
             --  encoding, and the encoded value in fact lies in the single word
             --  range (16#800# .. 16#D7FF or 16#DF01# .. 16#FFFF#). This means
             --  that the result fits in a single result word.

             if R <= 2#1111# then
                Get_Continuation;
                Get_Continuation;

                --  Make sure we are not in the forbidden surrogate range

                if R in 16#D800# .. 16#DF00# then
                   Raise_Encoding_Error (Iptr - 3);
                end if;

                --  Otherwise output a single UTF-16 value

                Len := Len + 1;
                Result (Len) := Wide_Character'Val (R);

             --  Here for normal case (code value > 16#FFFF and zzzzz non-zero)

             else
                --  Subtract 1 from input zzzzz value to get output zzzz value

                R := R - 2#0000_1_0000#;

                --  R now has zzzzyyyy (zzzz minus one for the output)

                Get_Continuation;

                --  R now has zzzzyy_yyyyyyxx

                Len := Len + 1;
                Result (Len) :=
                  Wide_Character'Val
                    (2#110110_00_0000_0000# or Shift_Right (R, 4));

                R := R and 2#1111#;
                Get_Continuation;
                Len := Len + 1;
                Result (Len) :=
                  Wide_Character'Val (2#110111_00_0000_0000# or R);
             end if;

          --  Any other code is an error

          else
             Raise_Encoding_Error (Iptr - 1);
          end if;
       end loop;

       return Result (1 .. Len);
    end Convert;

    --  Convert from UTF-16 to UTF-8/UTF-16-BE/LE

    function Convert
      (Item          : UTF_16_Wide_String;
       Output_Scheme : Encoding_Scheme;
       Output_BOM    : Boolean := False) return UTF_String
    is
    begin
       if Output_Scheme = UTF_8 then
          return Convert (Item, Output_BOM);
       else
          return From_UTF_16 (Item, Output_Scheme, Output_BOM);
       end if;
    end Convert;

    --  Convert from UTF-16 to UTF-8

    function Convert
      (Item          : UTF_16_Wide_String;
       Output_BOM    : Boolean := False) return UTF_8_String
    is
       Result : UTF_8_String (1 .. 3 * Item'Length + 3);
       --  Worst case is 3 output codes for each input code + BOM space

       Len : Natural;
       --  Number of result codes stored

       Iptr : Natural;
       --  Pointer to next input character

       C1, C2 : Unsigned_16;

       zzzzz    : Unsigned_16;
       yyyyyyyy : Unsigned_16;
       xxxxxxxx : Unsigned_16;
       --  Components of double length case

    begin
       Iptr := Item'First;

       --  Skip BOM at start of input

       if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then
          Iptr := Iptr + 1;
       end if;

       --  Generate output BOM if required

       if Output_BOM then
          Result (1 .. 3) := BOM_8;
          Len := 3;
       else
          Len := 0;
       end if;

       --  Loop through input

       while Iptr <= Item'Last loop
          C1 := To_Unsigned_16 (Item (Iptr));
          Iptr := Iptr + 1;

          --  Codes in the range 16#0000# - 16#007F#
          --    UTF-16: 000000000xxxxxxx
          --    UTF-8:  0xxxxxxx

          if C1 <= 16#007F# then
             Result (Len + 1) := Character'Val (C1);
             Len := Len + 1;

          --  Codes in the range 16#80# - 16#7FF#
          --    UTF-16: 00000yyyxxxxxxxx
          --    UTF-8:  110yyyxx 10xxxxxx

          elsif C1 <= 16#07FF# then
             Result (Len + 1) :=
               Character'Val
                 (2#110_00000# or Shift_Right (C1, 6));
             Result (Len + 2) :=
               Character'Val
                 (2#10_000000# or (C1 and 2#00_111111#));
             Len := Len + 2;

          --  Codes in the range 16#800# - 16#D7FF# or 16#E000# - 16#FFFF#
          --    UTF-16: yyyyyyyyxxxxxxxx
          --    UTF-8:  1110yyyy 10yyyyxx 10xxxxxx

          elsif C1 <= 16#D7FF# or else C1 >= 16#E000# then
             Result (Len + 1) :=
               Character'Val
                 (2#1110_0000# or Shift_Right (C1, 12));
             Result (Len + 2) :=
               Character'Val
                 (2#10_000000# or (Shift_Right (C1, 6) and 2#00_111111#));
             Result (Len + 3) :=
               Character'Val
                 (2#10_000000# or (C1 and 2#00_111111#));
             Len := Len + 3;

          --  Codes in the range 16#10000# - 16#10FFFF#
          --    UTF-16: 110110zzzzyyyyyy 110111yyxxxxxxxx
          --    UTF-8:  11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
          --    Note: zzzzz in the output is input zzzz + 1

          elsif C1 <= 2#110110_11_11111111# then
             if Iptr > Item'Last then
                Raise_Encoding_Error (Iptr - 1);
             else
                C2 := To_Unsigned_16 (Item (Iptr));
                Iptr := Iptr + 1;
             end if;

             if (C2 and 2#111111_00_00000000#) /= 2#110111_00_00000000# then
                Raise_Encoding_Error (Iptr - 1);
             end if;

             zzzzz    := (Shift_Right (C1, 6) and 2#1111#) + 1;
             yyyyyyyy := ((Shift_Left (C1, 2) and 2#111111_00#)
                             or
                          (Shift_Right (C2, 8) and 2#000000_11#));
             xxxxxxxx := C2 and 2#11111111#;

             Result (Len + 1) :=
               Character'Val
                 (2#11110_000# or (Shift_Right (zzzzz, 2)));
             Result (Len + 2) :=
               Character'Val
                 (2#10_000000# or Shift_Left (zzzzz and 2#11#, 4)
                               or Shift_Right (yyyyyyyy, 4));
             Result (Len + 3) :=
               Character'Val
                 (2#10_000000# or Shift_Left (yyyyyyyy and 2#1111#, 4)
                               or Shift_Right (xxxxxxxx, 6));
             Result (Len + 4) :=
               Character'Val
                 (2#10_000000# or (xxxxxxxx and 2#00_111111#));
             Len := Len + 4;

          --  Error if input in 16#DC00# - 16#DFFF# (2nd surrogate with no 1st)

          else
             Raise_Encoding_Error (Iptr - 2);
          end if;
       end loop;

       return Result (1 .. Len);
    end Convert;

 end Ada.Strings.UTF_Encoding.Conversions;
	------------------------------------------------------------------------------
	-- --
	-- GNAT RUN-TIME COMPONENTS --
	-- --
	-- ADA.STRINGS.UTF_ENCODING.CONVERSIONS --
	-- --
	-- B o d y --
	-- --
	-- Copyright (C) 2010-2014, Free Software Foundation, Inc. --
	-- --
	-- GNAT is free software; you can redistribute it and/or modify it under --
	-- terms of the GNU General Public License as published by the Free Soft- --
	-- ware Foundation; either version 3, or (at your option) any later ver- --
	-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
	-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
	-- or FITNESS FOR A PARTICULAR PURPOSE. --
	-- --
	-- As a special exception under Section 7 of GPL version 3, you are granted --
	-- additional permissions described in the GCC Runtime Library Exception, --
	-- version 3.1, as published by the Free Software Foundation. --
	-- --
	-- You should have received a copy of the GNU General Public License and --
	-- a copy of the GCC Runtime Library Exception along with this program; --
	-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
	-- <http://www.gnu.org/licenses/>. --
	-- --
	-- GNAT was originally developed by the GNAT team at New York University. --
	-- Extensive contributions were provided by Ada Core Technologies Inc. --
	-- --
	------------------------------------------------------------------------------

	package body Ada.Strings.UTF_Encoding.Conversions is
	use Interfaces;

	-- Convert from UTF-8/UTF-16BE/LE to UTF-8/UTF-16BE/LE

	function Convert
	(Item : UTF_String;
	Input_Scheme : Encoding_Scheme;
	Output_Scheme : Encoding_Scheme;
	Output_BOM : Boolean := False) return UTF_String
	is
	begin
	-- Nothing to do if identical schemes, but for UTF_8 we need to
	-- handle overlong encodings, so need to do the full conversion.

	if Input_Scheme = Output_Scheme
	and then Input_Scheme /= UTF_8
	then
	return Item;

	-- For remaining cases, one or other of the operands is UTF-16BE/LE
	-- encoded, or we have the UTF-8 to UTF-8 case where we must handle
	-- overlong encodings. In all cases, go through UTF-16 intermediate.

	else
	return Convert (UTF_16_Wide_String'(Convert (Item, Input_Scheme)),
	Output_Scheme, Output_BOM);
	end if;
	end Convert;

	-- Convert from UTF-8/UTF-16BE/LE to UTF-16

	function Convert
	(Item : UTF_String;
	Input_Scheme : Encoding_Scheme;
	Output_BOM : Boolean := False) return UTF_16_Wide_String
	is
	begin
	if Input_Scheme = UTF_8 then
	return Convert (Item, Output_BOM);
	else
	return To_UTF_16 (Item, Input_Scheme, Output_BOM);
	end if;
	end Convert;

	-- Convert from UTF-8 to UTF-16

	function Convert
	(Item : UTF_8_String;
	Output_BOM : Boolean := False) return UTF_16_Wide_String
	is
	Result : UTF_16_Wide_String (1 .. Item'Length + 1);
	-- Maximum length of result, including possible BOM

	Len : Natural := 0;
	-- Number of characters stored so far in Result

	Iptr : Natural;
	-- Next character to process in Item

	C : Unsigned_8;
	-- Input UTF-8 code

	R : Unsigned_16;
	-- Output UTF-16 code

	procedure Get_Continuation;
	-- Reads a continuation byte of the form 10xxxxxx, shifts R left by 6
	-- bits, and or's in the xxxxxx to the low order 6 bits. On return Ptr
	-- is incremented. Raises exception if continuation byte does not exist
	-- or is invalid.

	----------------------
	-- Get_Continuation --
	----------------------

	procedure Get_Continuation is
	begin
	if Iptr > Item'Last then
	Raise_Encoding_Error (Iptr - 1);

	else
	C := To_Unsigned_8 (Item (Iptr));
	Iptr := Iptr + 1;

	if C < 2#10_000000# or else C > 2#10_111111# then
	Raise_Encoding_Error (Iptr - 1);

	else
	R :=
	Shift_Left (R, 6) or Unsigned_16 (C and 2#00_111111#);
	end if;
	end if;
	end Get_Continuation;

	-- Start of processing for Convert

	begin
	-- Output BOM if required

	if Output_BOM then
	Len := Len + 1;
	Result (Len) := BOM_16 (1);
	end if;

	-- Skip OK BOM

	Iptr := Item'First;

	if Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
	Iptr := Iptr + 3;

	-- Error if bad BOM

	elsif Item'Length >= 2
	and then (Item (Iptr .. Iptr + 1) = BOM_16BE
	or else
	Item (Iptr .. Iptr + 1) = BOM_16LE)
	then
	Raise_Encoding_Error (Iptr);

	-- No BOM present

	else
	Iptr := Item'First;
	end if;

	while Iptr <= Item'Last loop
	C := To_Unsigned_8 (Item (Iptr));
	Iptr := Iptr + 1;

	-- Codes in the range 16#00# .. 16#7F#
	-- UTF-8: 0xxxxxxx
	-- UTF-16: 00000000_0xxxxxxx

	if C <= 16#7F# then
	Len := Len + 1;
	Result (Len) := Wide_Character'Val (C);

	-- No initial code can be of the form 10xxxxxx. Such codes are used
	-- only for continuations.

	elsif C <= 2#10_111111# then
	Raise_Encoding_Error (Iptr - 1);

	-- Codes in the range 16#80# .. 16#7FF#
	-- UTF-8: 110yyyxx 10xxxxxx
	-- UTF-16: 00000yyy_xxxxxxxx

	elsif C <= 2#110_11111# then
	R := Unsigned_16 (C and 2#000_11111#);
	Get_Continuation;
	Len := Len + 1;
	Result (Len) := Wide_Character'Val (R);

	-- Codes in the range 16#800# .. 16#D7FF or 16#DF01# .. 16#FFFF#
	-- UTF-8: 1110yyyy 10yyyyxx 10xxxxxx
	-- UTF-16: yyyyyyyy_xxxxxxxx

	elsif C <= 2#1110_1111# then
	R := Unsigned_16 (C and 2#0000_1111#);
	Get_Continuation;
	Get_Continuation;
	Len := Len + 1;
	Result (Len) := Wide_Character'Val (R);

	-- Make sure that we don't have a result in the forbidden range
	-- reserved for UTF-16 surrogate characters.

	if R in 16#D800# .. 16#DF00# then
	Raise_Encoding_Error (Iptr - 3);
	end if;

	-- Codes in the range 16#10000# .. 16#10FFFF#
	-- UTF-8: 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
	-- UTF-16: 110110zz_zzyyyyyy 110111yy_xxxxxxxx
	-- Note: zzzz in the output is input zzzzz - 1

	elsif C <= 2#11110_111# then
	R := Unsigned_16 (C and 2#00000_111#);
	Get_Continuation;

	-- R now has zzzzzyyyy

	-- At this stage, we check for the case where we have an overlong
	-- encoding, and the encoded value in fact lies in the single word
	-- range (16#800# .. 16#D7FF or 16#DF01# .. 16#FFFF#). This means
	-- that the result fits in a single result word.

	if R <= 2#1111# then
	Get_Continuation;
	Get_Continuation;

	-- Make sure we are not in the forbidden surrogate range

	if R in 16#D800# .. 16#DF00# then
	Raise_Encoding_Error (Iptr - 3);
	end if;

	-- Otherwise output a single UTF-16 value

	Len := Len + 1;
	Result (Len) := Wide_Character'Val (R);

	-- Here for normal case (code value > 16#FFFF and zzzzz non-zero)

	else
	-- Subtract 1 from input zzzzz value to get output zzzz value

	R := R - 2#0000_1_0000#;

	-- R now has zzzzyyyy (zzzz minus one for the output)

	Get_Continuation;

	-- R now has zzzzyy_yyyyyyxx

	Len := Len + 1;
	Result (Len) :=
	Wide_Character'Val
	(2#110110_00_0000_0000# or Shift_Right (R, 4));

	R := R and 2#1111#;
	Get_Continuation;
	Len := Len + 1;
	Result (Len) :=
	Wide_Character'Val (2#110111_00_0000_0000# or R);
	end if;

	-- Any other code is an error

	else
	Raise_Encoding_Error (Iptr - 1);
	end if;
	end loop;

	return Result (1 .. Len);
	end Convert;

	-- Convert from UTF-16 to UTF-8/UTF-16-BE/LE

	function Convert
	(Item : UTF_16_Wide_String;
	Output_Scheme : Encoding_Scheme;
	Output_BOM : Boolean := False) return UTF_String
	is
	begin
	if Output_Scheme = UTF_8 then
	return Convert (Item, Output_BOM);
	else
	return From_UTF_16 (Item, Output_Scheme, Output_BOM);
	end if;
	end Convert;

	-- Convert from UTF-16 to UTF-8

	function Convert
	(Item : UTF_16_Wide_String;
	Output_BOM : Boolean := False) return UTF_8_String
	is
	Result : UTF_8_String (1 .. 3 * Item'Length + 3);
	-- Worst case is 3 output codes for each input code + BOM space

	Len : Natural;
	-- Number of result codes stored

	Iptr : Natural;
	-- Pointer to next input character

	C1, C2 : Unsigned_16;

	zzzzz : Unsigned_16;
	yyyyyyyy : Unsigned_16;
	xxxxxxxx : Unsigned_16;
	-- Components of double length case

	begin
	Iptr := Item'First;

	-- Skip BOM at start of input

	if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then
	Iptr := Iptr + 1;
	end if;

	-- Generate output BOM if required

	if Output_BOM then
	Result (1 .. 3) := BOM_8;
	Len := 3;
	else
	Len := 0;
	end if;

	-- Loop through input

	while Iptr <= Item'Last loop
	C1 := To_Unsigned_16 (Item (Iptr));
	Iptr := Iptr + 1;

	-- Codes in the range 16#0000# - 16#007F#
	-- UTF-16: 000000000xxxxxxx
	-- UTF-8: 0xxxxxxx

	if C1 <= 16#007F# then
	Result (Len + 1) := Character'Val (C1);
	Len := Len + 1;

	-- Codes in the range 16#80# - 16#7FF#
	-- UTF-16: 00000yyyxxxxxxxx
	-- UTF-8: 110yyyxx 10xxxxxx

	elsif C1 <= 16#07FF# then
	Result (Len + 1) :=
	Character'Val
	(2#110_00000# or Shift_Right (C1, 6));
	Result (Len + 2) :=
	Character'Val
	(2#10_000000# or (C1 and 2#00_111111#));
	Len := Len + 2;

	-- Codes in the range 16#800# - 16#D7FF# or 16#E000# - 16#FFFF#
	-- UTF-16: yyyyyyyyxxxxxxxx
	-- UTF-8: 1110yyyy 10yyyyxx 10xxxxxx

	elsif C1 <= 16#D7FF# or else C1 >= 16#E000# then
	Result (Len + 1) :=
	Character'Val
	(2#1110_0000# or Shift_Right (C1, 12));
	Result (Len + 2) :=
	Character'Val
	(2#10_000000# or (Shift_Right (C1, 6) and 2#00_111111#));
	Result (Len + 3) :=
	Character'Val
	(2#10_000000# or (C1 and 2#00_111111#));
	Len := Len + 3;

	-- Codes in the range 16#10000# - 16#10FFFF#
	-- UTF-16: 110110zzzzyyyyyy 110111yyxxxxxxxx
	-- UTF-8: 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
	-- Note: zzzzz in the output is input zzzz + 1

	elsif C1 <= 2#110110_11_11111111# then
	if Iptr > Item'Last then
	Raise_Encoding_Error (Iptr - 1);
	else
	C2 := To_Unsigned_16 (Item (Iptr));
	Iptr := Iptr + 1;
	end if;

	if (C2 and 2#111111_00_00000000#) /= 2#110111_00_00000000# then
	Raise_Encoding_Error (Iptr - 1);
	end if;

	zzzzz := (Shift_Right (C1, 6) and 2#1111#) + 1;
	yyyyyyyy := ((Shift_Left (C1, 2) and 2#111111_00#)
	or
	(Shift_Right (C2, 8) and 2#000000_11#));
	xxxxxxxx := C2 and 2#11111111#;

	Result (Len + 1) :=
	Character'Val
	(2#11110_000# or (Shift_Right (zzzzz, 2)));
	Result (Len + 2) :=
	Character'Val
	(2#10_000000# or Shift_Left (zzzzz and 2#11#, 4)
	or Shift_Right (yyyyyyyy, 4));
	Result (Len + 3) :=
	Character'Val
	(2#10_000000# or Shift_Left (yyyyyyyy and 2#1111#, 4)
	or Shift_Right (xxxxxxxx, 6));
	Result (Len + 4) :=
	Character'Val
	(2#10_000000# or (xxxxxxxx and 2#00_111111#));
	Len := Len + 4;

	-- Error if input in 16#DC00# - 16#DFFF# (2nd surrogate with no 1st)

	else
	Raise_Encoding_Error (Iptr - 2);
	end if;
	end loop;

	return Result (1 .. Len);
	end Convert;

	end Ada.Strings.UTF_Encoding.Conversions;