| ------------------------------------------------------------------------------ |
| -- -- |
| -- GNAT RUN-TIME COMPONENTS -- |
| -- -- |
| -- A D A . S T R I N G S . U T F _ E N C O D I N G -- |
| -- -- |
| -- B o d y -- |
| -- -- |
| -- Copyright (C) 2010-2022, Free Software Foundation, Inc. -- |
| -- -- |
| -- GNAT is free software; you can redistribute it and/or modify it under -- |
| -- terms of the GNU General Public License as published by the Free Soft- -- |
| -- ware Foundation; either version 3, or (at your option) any later ver- -- |
| -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- |
| -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- |
| -- or FITNESS FOR A PARTICULAR PURPOSE. -- |
| -- -- |
| -- As a special exception under Section 7 of GPL version 3, you are granted -- |
| -- additional permissions described in the GCC Runtime Library Exception, -- |
| -- version 3.1, as published by the Free Software Foundation. -- |
| -- -- |
| -- You should have received a copy of the GNU General Public License and -- |
| -- a copy of the GCC Runtime Library Exception along with this program; -- |
| -- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -- |
| -- <http://www.gnu.org/licenses/>. -- |
| -- -- |
| -- GNAT was originally developed by the GNAT team at New York University. -- |
| -- Extensive contributions were provided by Ada Core Technologies Inc. -- |
| -- -- |
| ------------------------------------------------------------------------------ |
| |
| package body Ada.Strings.UTF_Encoding is |
| use Interfaces; |
| |
| -------------- |
| -- Encoding -- |
| -------------- |
| |
| function Encoding |
| (Item : UTF_String; |
| Default : Encoding_Scheme := UTF_8) return Encoding_Scheme |
| is |
| begin |
| if Item'Length >= 2 then |
| if Item (Item'First .. Item'First + 1) = BOM_16BE then |
| return UTF_16BE; |
| |
| elsif Item (Item'First .. Item'First + 1) = BOM_16LE then |
| return UTF_16LE; |
| |
| elsif Item'Length >= 3 |
| and then Item (Item'First .. Item'First + 2) = BOM_8 |
| then |
| return UTF_8; |
| end if; |
| end if; |
| |
| return Default; |
| end Encoding; |
| |
| ----------------- |
| -- From_UTF_16 -- |
| ----------------- |
| |
| function From_UTF_16 |
| (Item : UTF_16_Wide_String; |
| Output_Scheme : UTF_XE_Encoding; |
| Output_BOM : Boolean := False) return UTF_String |
| is |
| BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM); |
| Result : UTF_String (1 .. 2 * Item'Length + BSpace); |
| Len : Natural; |
| C : Unsigned_16; |
| Iptr : Natural; |
| |
| begin |
| if Output_BOM then |
| Result (1 .. 2) := |
| (if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE); |
| Len := 2; |
| else |
| Len := 0; |
| end if; |
| |
| -- Skip input BOM |
| |
| Iptr := Item'First; |
| |
| if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then |
| Iptr := Iptr + 1; |
| end if; |
| |
| -- UTF-16BE case |
| |
| if Output_Scheme = UTF_16BE then |
| while Iptr <= Item'Last loop |
| C := To_Unsigned_16 (Item (Iptr)); |
| Result (Len + 1) := Character'Val (Shift_Right (C, 8)); |
| Result (Len + 2) := Character'Val (C and 16#00_FF#); |
| Len := Len + 2; |
| Iptr := Iptr + 1; |
| end loop; |
| |
| -- UTF-16LE case |
| |
| else |
| while Iptr <= Item'Last loop |
| C := To_Unsigned_16 (Item (Iptr)); |
| Result (Len + 1) := Character'Val (C and 16#00_FF#); |
| Result (Len + 2) := Character'Val (Shift_Right (C, 8)); |
| Len := Len + 2; |
| Iptr := Iptr + 1; |
| end loop; |
| end if; |
| |
| return Result (1 .. Len); |
| end From_UTF_16; |
| |
| -------------------------- |
| -- Raise_Encoding_Error -- |
| -------------------------- |
| |
| procedure Raise_Encoding_Error (Index : Natural) is |
| Val : constant String := Index'Img; |
| begin |
| raise Encoding_Error with |
| "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')'; |
| end Raise_Encoding_Error; |
| |
| --------------- |
| -- To_UTF_16 -- |
| --------------- |
| |
| function To_UTF_16 |
| (Item : UTF_String; |
| Input_Scheme : UTF_XE_Encoding; |
| Output_BOM : Boolean := False) return UTF_16_Wide_String |
| is |
| Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1); |
| Len : Natural; |
| Iptr : Natural; |
| |
| begin |
| if Item'Length mod 2 /= 0 then |
| raise Encoding_Error with "UTF-16BE/LE string has odd length"; |
| end if; |
| |
| -- Deal with input BOM, skip if OK, error if bad BOM |
| |
| Iptr := Item'First; |
| |
| if Item'Length >= 2 then |
| if Item (Iptr .. Iptr + 1) = BOM_16BE then |
| if Input_Scheme = UTF_16BE then |
| Iptr := Iptr + 2; |
| else |
| Raise_Encoding_Error (Iptr); |
| end if; |
| |
| elsif Item (Iptr .. Iptr + 1) = BOM_16LE then |
| if Input_Scheme = UTF_16LE then |
| Iptr := Iptr + 2; |
| else |
| Raise_Encoding_Error (Iptr); |
| end if; |
| |
| elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then |
| Raise_Encoding_Error (Iptr); |
| end if; |
| end if; |
| |
| -- Output BOM if specified |
| |
| if Output_BOM then |
| Result (1) := BOM_16 (1); |
| Len := 1; |
| else |
| Len := 0; |
| end if; |
| |
| -- UTF-16BE case |
| |
| if Input_Scheme = UTF_16BE then |
| while Iptr < Item'Last loop |
| Len := Len + 1; |
| Result (Len) := |
| Wide_Character'Val |
| (Character'Pos (Item (Iptr)) * 256 + |
| Character'Pos (Item (Iptr + 1))); |
| Iptr := Iptr + 2; |
| end loop; |
| |
| -- UTF-16LE case |
| |
| else |
| while Iptr < Item'Last loop |
| Len := Len + 1; |
| Result (Len) := |
| Wide_Character'Val |
| (Character'Pos (Item (Iptr)) + |
| Character'Pos (Item (Iptr + 1)) * 256); |
| Iptr := Iptr + 2; |
| end loop; |
| end if; |
| |
| return Result (1 .. Len); |
| end To_UTF_16; |
| |
| end Ada.Strings.UTF_Encoding; |