| ------------------------------------------------------------------------------ |
| -- -- |
| -- GNAT RUNTIME COMPONENTS -- |
| -- -- |
| -- S Y S T E M . W C H _ C N V -- |
| -- -- |
| -- B o d y -- |
| -- -- |
| -- Copyright (C) 1992-2001 Free Software Foundation, Inc. -- |
| -- -- |
| -- GNAT is free software; you can redistribute it and/or modify it under -- |
| -- terms of the GNU General Public License as published by the Free Soft- -- |
| -- ware Foundation; either version 2, or (at your option) any later ver- -- |
| -- sion. GNAT is distributed in the hope that it will be useful, but WITH- -- |
| -- OUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -- |
| -- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -- |
| -- for more details. You should have received a copy of the GNU General -- |
| -- Public License distributed with GNAT; see file COPYING. If not, write -- |
| -- to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, -- |
| -- MA 02111-1307, USA. -- |
| -- -- |
| -- As a special exception, if other files instantiate generics from this -- |
| -- unit, or you link this unit with other files to produce an executable, -- |
| -- this unit does not by itself cause the resulting executable to be -- |
| -- covered by the GNU General Public License. This exception does not -- |
| -- however invalidate any other reasons why the executable file might be -- |
| -- covered by the GNU Public License. -- |
| -- -- |
| -- GNAT was originally developed by the GNAT team at New York University. -- |
| -- Extensive contributions were provided by Ada Core Technologies Inc. -- |
| -- -- |
| ------------------------------------------------------------------------------ |
| |
| -- This package contains generic subprograms used for converting between |
| -- sequences of Character and Wide_Character. All access to wide character |
| -- sequences is isolated in this unit. |
| |
| with Interfaces; use Interfaces; |
| with System.WCh_Con; use System.WCh_Con; |
| with System.WCh_JIS; use System.WCh_JIS; |
| |
| package body System.WCh_Cnv is |
| |
| -------------------------------- |
| -- Char_Sequence_To_Wide_Char -- |
| -------------------------------- |
| |
| function Char_Sequence_To_Wide_Char |
| (C : Character; |
| EM : WC_Encoding_Method) |
| return Wide_Character |
| is |
| B1 : Integer; |
| C1 : Character; |
| U : Unsigned_16; |
| W : Unsigned_16; |
| |
| procedure Get_Hex (N : Character); |
| -- If N is a hex character, then set B1 to 16 * B1 + character N. |
| -- Raise Constraint_Error if character N is not a hex character. |
| |
| ------------- |
| -- Get_Hex -- |
| ------------- |
| |
| procedure Get_Hex (N : Character) is |
| B2 : constant Integer := Character'Pos (N); |
| |
| begin |
| if B2 in Character'Pos ('0') .. Character'Pos ('9') then |
| B1 := B1 * 16 + B2 - Character'Pos ('0'); |
| |
| elsif B2 in Character'Pos ('A') .. Character'Pos ('F') then |
| B1 := B1 * 16 + B2 - (Character'Pos ('A') - 10); |
| |
| elsif B2 in Character'Pos ('a') .. Character'Pos ('f') then |
| B1 := B1 * 16 + B2 - (Character'Pos ('a') - 10); |
| |
| else |
| raise Constraint_Error; |
| end if; |
| end Get_Hex; |
| |
| -- Start of processing for Char_Sequence_To_Wide_Char |
| |
| begin |
| case EM is |
| |
| when WCEM_Hex => |
| if C /= ASCII.ESC then |
| return Wide_Character'Val (Character'Pos (C)); |
| |
| else |
| B1 := 0; |
| Get_Hex (In_Char); |
| Get_Hex (In_Char); |
| Get_Hex (In_Char); |
| Get_Hex (In_Char); |
| |
| return Wide_Character'Val (B1); |
| end if; |
| |
| when WCEM_Upper => |
| if C > ASCII.DEL then |
| return |
| Wide_Character'Val |
| (Integer (256 * Character'Pos (C)) + |
| Character'Pos (In_Char)); |
| else |
| return Wide_Character'Val (Character'Pos (C)); |
| end if; |
| |
| when WCEM_Shift_JIS => |
| if C > ASCII.DEL then |
| return Shift_JIS_To_JIS (C, In_Char); |
| else |
| return Wide_Character'Val (Character'Pos (C)); |
| end if; |
| |
| when WCEM_EUC => |
| if C > ASCII.DEL then |
| return EUC_To_JIS (C, In_Char); |
| else |
| return Wide_Character'Val (Character'Pos (C)); |
| end if; |
| |
| when WCEM_UTF8 => |
| if C > ASCII.DEL then |
| |
| -- 16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx# |
| -- 16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx# |
| |
| U := Unsigned_16 (Character'Pos (C)); |
| |
| if (U and 2#11100000#) = 2#11000000# then |
| W := Shift_Left (U and 2#00011111#, 6); |
| U := Unsigned_16 (Character'Pos (In_Char)); |
| |
| if (U and 2#11000000#) /= 2#10000000# then |
| raise Constraint_Error; |
| end if; |
| |
| W := W or (U and 2#00111111#); |
| |
| elsif (U and 2#11110000#) = 2#11100000# then |
| W := Shift_Left (U and 2#00001111#, 12); |
| U := Unsigned_16 (Character'Pos (In_Char)); |
| |
| if (U and 2#11000000#) /= 2#10000000# then |
| raise Constraint_Error; |
| end if; |
| |
| W := W or Shift_Left (U and 2#00111111#, 6); |
| U := Unsigned_16 (Character'Pos (In_Char)); |
| |
| if (U and 2#11000000#) /= 2#10000000# then |
| raise Constraint_Error; |
| end if; |
| |
| W := W or (U and 2#00111111#); |
| |
| else |
| raise Constraint_Error; |
| end if; |
| |
| return Wide_Character'Val (W); |
| |
| else |
| return Wide_Character'Val (Character'Pos (C)); |
| end if; |
| |
| when WCEM_Brackets => |
| |
| if C /= '[' then |
| return Wide_Character'Val (Character'Pos (C)); |
| end if; |
| |
| if In_Char /= '"' then |
| raise Constraint_Error; |
| end if; |
| |
| B1 := 0; |
| Get_Hex (In_Char); |
| Get_Hex (In_Char); |
| C1 := In_Char; |
| |
| if C1 /= '"' then |
| Get_Hex (C1); |
| Get_Hex (In_Char); |
| C1 := In_Char; |
| |
| if C1 /= '"' then |
| raise Constraint_Error; |
| end if; |
| end if; |
| |
| if In_Char /= ']' then |
| raise Constraint_Error; |
| end if; |
| |
| return Wide_Character'Val (B1); |
| |
| end case; |
| end Char_Sequence_To_Wide_Char; |
| |
| -------------------------------- |
| -- Wide_Char_To_Char_Sequence -- |
| -------------------------------- |
| |
| procedure Wide_Char_To_Char_Sequence |
| (WC : Wide_Character; |
| EM : WC_Encoding_Method) |
| is |
| Val : constant Natural := Wide_Character'Pos (WC); |
| Hexc : constant array (0 .. 15) of Character := "0123456789ABCDEF"; |
| C1, C2 : Character; |
| U : Unsigned_16; |
| |
| begin |
| case EM is |
| |
| when WCEM_Hex => |
| if Val < 256 then |
| Out_Char (Character'Val (Val)); |
| |
| else |
| Out_Char (ASCII.ESC); |
| Out_Char (Hexc (Val / (16**3))); |
| Out_Char (Hexc ((Val / (16**2)) mod 16)); |
| Out_Char (Hexc ((Val / 16) mod 16)); |
| Out_Char (Hexc (Val mod 16)); |
| end if; |
| |
| when WCEM_Upper => |
| if Val < 128 then |
| Out_Char (Character'Val (Val)); |
| |
| elsif Val < 16#8000# then |
| raise Constraint_Error; |
| |
| else |
| Out_Char (Character'Val (Val / 256)); |
| Out_Char (Character'Val (Val mod 256)); |
| end if; |
| |
| when WCEM_Shift_JIS => |
| if Val < 128 then |
| Out_Char (Character'Val (Val)); |
| else |
| JIS_To_Shift_JIS (WC, C1, C2); |
| Out_Char (C1); |
| Out_Char (C2); |
| end if; |
| |
| when WCEM_EUC => |
| if Val < 128 then |
| Out_Char (Character'Val (Val)); |
| else |
| JIS_To_EUC (WC, C1, C2); |
| Out_Char (C1); |
| Out_Char (C2); |
| end if; |
| |
| when WCEM_UTF8 => |
| U := Unsigned_16 (Val); |
| |
| -- 16#0000#-16#007f#: 2#0xxxxxxx# |
| -- 16#0080#-16#07ff#: 2#110xxxxx# 2#10xxxxxx# |
| -- 16#0800#-16#ffff#: 2#1110xxxx# 2#10xxxxxx# 2#10xxxxxx# |
| |
| if U < 16#80# then |
| Out_Char (Character'Val (U)); |
| |
| elsif U < 16#0800# then |
| Out_Char (Character'Val (2#11000000# or Shift_Right (U, 6))); |
| Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); |
| |
| else |
| Out_Char (Character'Val (2#11100000# or Shift_Right (U, 12))); |
| Out_Char (Character'Val (2#10000000# or (Shift_Right (U, 6) |
| and 2#00111111#))); |
| Out_Char (Character'Val (2#10000000# or (U and 2#00111111#))); |
| end if; |
| |
| when WCEM_Brackets => |
| |
| if Val < 256 then |
| Out_Char (Character'Val (Val)); |
| |
| else |
| Out_Char ('['); |
| Out_Char ('"'); |
| Out_Char (Hexc (Val / (16**3))); |
| Out_Char (Hexc ((Val / (16**2)) mod 16)); |
| Out_Char (Hexc ((Val / 16) mod 16)); |
| Out_Char (Hexc (Val mod 16)); |
| Out_Char ('"'); |
| Out_Char (']'); |
| end if; |
| end case; |
| end Wide_Char_To_Char_Sequence; |
| |
| end System.WCh_Cnv; |