blob: 10be744123e46b600a0de1359b2ff0fe085989d8 [file] [log] [blame]
------------------------------------------------------------------------------
-- --
-- GNAT RUN-TIME COMPONENTS --
-- --
-- A D A . S T R I N G S . U T F _ E N C O D I N G --
-- --
-- B o d y --
-- --
-- Copyright (C) 2010-2023, Free Software Foundation, Inc. --
-- --
-- GNAT is free software; you can redistribute it and/or modify it under --
-- terms of the GNU General Public License as published by the Free Soft- --
-- ware Foundation; either version 3, or (at your option) any later ver- --
-- sion. GNAT is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE. --
-- --
-- As a special exception under Section 7 of GPL version 3, you are granted --
-- additional permissions described in the GCC Runtime Library Exception, --
-- version 3.1, as published by the Free Software Foundation. --
-- --
-- You should have received a copy of the GNU General Public License and --
-- a copy of the GCC Runtime Library Exception along with this program; --
-- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see --
-- <http://www.gnu.org/licenses/>. --
-- --
-- GNAT was originally developed by the GNAT team at New York University. --
-- Extensive contributions were provided by Ada Core Technologies Inc. --
-- --
------------------------------------------------------------------------------
package body Ada.Strings.UTF_Encoding is
use Interfaces;
--------------
-- Encoding --
--------------
function Encoding
(Item : UTF_String;
Default : Encoding_Scheme := UTF_8) return Encoding_Scheme
is
begin
if Item'Length >= 2 then
if Item (Item'First .. Item'First + 1) = BOM_16BE then
return UTF_16BE;
elsif Item (Item'First .. Item'First + 1) = BOM_16LE then
return UTF_16LE;
elsif Item'Length >= 3
and then Item (Item'First .. Item'First + 2) = BOM_8
then
return UTF_8;
end if;
end if;
return Default;
end Encoding;
-----------------
-- From_UTF_16 --
-----------------
function From_UTF_16
(Item : UTF_16_Wide_String;
Output_Scheme : UTF_XE_Encoding;
Output_BOM : Boolean := False) return UTF_String
is
BSpace : constant Natural := 2 * Boolean'Pos (Output_BOM);
Result : UTF_String (1 .. 2 * Item'Length + BSpace);
Len : Natural;
C : Unsigned_16;
Iptr : Natural;
begin
if Output_BOM then
Result (1 .. 2) :=
(if Output_Scheme = UTF_16BE then BOM_16BE else BOM_16LE);
Len := 2;
else
Len := 0;
end if;
-- Skip input BOM
Iptr := Item'First;
if Iptr <= Item'Last and then Item (Iptr) = BOM_16 (1) then
Iptr := Iptr + 1;
end if;
-- UTF-16BE case
if Output_Scheme = UTF_16BE then
while Iptr <= Item'Last loop
C := To_Unsigned_16 (Item (Iptr));
Result (Len + 1) := Character'Val (Shift_Right (C, 8));
Result (Len + 2) := Character'Val (C and 16#00_FF#);
Len := Len + 2;
Iptr := Iptr + 1;
end loop;
-- UTF-16LE case
else
while Iptr <= Item'Last loop
C := To_Unsigned_16 (Item (Iptr));
Result (Len + 1) := Character'Val (C and 16#00_FF#);
Result (Len + 2) := Character'Val (Shift_Right (C, 8));
Len := Len + 2;
Iptr := Iptr + 1;
end loop;
end if;
return Result (1 .. Len);
end From_UTF_16;
--------------------------
-- Raise_Encoding_Error --
--------------------------
procedure Raise_Encoding_Error (Index : Natural) is
Val : constant String := Index'Img;
begin
raise Encoding_Error with
"bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')';
end Raise_Encoding_Error;
---------------
-- To_UTF_16 --
---------------
function To_UTF_16
(Item : UTF_String;
Input_Scheme : UTF_XE_Encoding;
Output_BOM : Boolean := False) return UTF_16_Wide_String
is
Result : UTF_16_Wide_String (1 .. Item'Length / 2 + 1);
Len : Natural;
Iptr : Natural;
begin
if Item'Length mod 2 /= 0 then
raise Encoding_Error with "UTF-16BE/LE string has odd length";
end if;
-- Deal with input BOM, skip if OK, error if bad BOM
Iptr := Item'First;
if Item'Length >= 2 then
if Item (Iptr .. Iptr + 1) = BOM_16BE then
if Input_Scheme = UTF_16BE then
Iptr := Iptr + 2;
else
Raise_Encoding_Error (Iptr);
end if;
elsif Item (Iptr .. Iptr + 1) = BOM_16LE then
if Input_Scheme = UTF_16LE then
Iptr := Iptr + 2;
else
Raise_Encoding_Error (Iptr);
end if;
elsif Item'Length >= 3 and then Item (Iptr .. Iptr + 2) = BOM_8 then
Raise_Encoding_Error (Iptr);
end if;
end if;
-- Output BOM if specified
if Output_BOM then
Result (1) := BOM_16 (1);
Len := 1;
else
Len := 0;
end if;
-- UTF-16BE case
if Input_Scheme = UTF_16BE then
while Iptr < Item'Last loop
Len := Len + 1;
Result (Len) :=
Wide_Character'Val
(Character'Pos (Item (Iptr)) * 256 +
Character'Pos (Item (Iptr + 1)));
Iptr := Iptr + 2;
end loop;
-- UTF-16LE case
else
while Iptr < Item'Last loop
Len := Len + 1;
Result (Len) :=
Wide_Character'Val
(Character'Pos (Item (Iptr)) +
Character'Pos (Item (Iptr + 1)) * 256);
Iptr := Iptr + 2;
end loop;
end if;
return Result (1 .. Len);
end To_UTF_16;
end Ada.Strings.UTF_Encoding;