|  | /* Character set conversion support for GDB. | 
|  |  | 
|  | Copyright (C) 2001-2023 Free Software Foundation, Inc. | 
|  |  | 
|  | This file is part of GDB. | 
|  |  | 
|  | This program is free software; you can redistribute it and/or modify | 
|  | it under the terms of the GNU General Public License as published by | 
|  | the Free Software Foundation; either version 3 of the License, or | 
|  | (at your option) any later version. | 
|  |  | 
|  | This program is distributed in the hope that it will be useful, | 
|  | but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | GNU General Public License for more details. | 
|  |  | 
|  | You should have received a copy of the GNU General Public License | 
|  | along with this program.  If not, see <http://www.gnu.org/licenses/>.  */ | 
|  |  | 
|  | #include "defs.h" | 
|  | #include "charset.h" | 
|  | #include "gdbcmd.h" | 
|  | #include "gdbsupport/gdb_obstack.h" | 
|  | #include "gdbsupport/gdb_wait.h" | 
|  | #include "charset-list.h" | 
|  | #include "gdbsupport/environ.h" | 
|  | #include "arch-utils.h" | 
|  | #include "gdbsupport/gdb_vecs.h" | 
|  | #include <ctype.h> | 
|  |  | 
|  | #ifdef USE_WIN32API | 
|  | #include <windows.h> | 
|  | #endif | 
|  |  | 
|  | /* How GDB's character set support works | 
|  |  | 
|  | GDB has three global settings: | 
|  |  | 
|  | - The `current host character set' is the character set GDB should | 
|  | use in talking to the user, and which (hopefully) the user's | 
|  | terminal knows how to display properly.  Most users should not | 
|  | change this. | 
|  |  | 
|  | - The `current target character set' is the character set the | 
|  | program being debugged uses. | 
|  |  | 
|  | - The `current target wide character set' is the wide character set | 
|  | the program being debugged uses, that is, the encoding used for | 
|  | wchar_t. | 
|  |  | 
|  | There are commands to set each of these, and mechanisms for | 
|  | choosing reasonable default values.  GDB has a global list of | 
|  | character sets that it can use as its host or target character | 
|  | sets. | 
|  |  | 
|  | The header file `charset.h' declares various functions that | 
|  | different pieces of GDB need to perform tasks like: | 
|  |  | 
|  | - printing target strings and characters to the user's terminal | 
|  | (mostly target->host conversions), | 
|  |  | 
|  | - building target-appropriate representations of strings and | 
|  | characters the user enters in expressions (mostly host->target | 
|  | conversions), | 
|  |  | 
|  | and so on. | 
|  |  | 
|  | To avoid excessive code duplication and maintenance efforts, | 
|  | GDB simply requires a capable iconv function.  Users on platforms | 
|  | without a suitable iconv can use the GNU iconv library.  */ | 
|  |  | 
|  |  | 
|  | #ifdef PHONY_ICONV | 
|  |  | 
|  | /* Provide a phony iconv that does as little as possible.  Also, | 
|  | arrange for there to be a single available character set.  */ | 
|  |  | 
|  | #undef GDB_DEFAULT_HOST_CHARSET | 
|  | #ifdef USE_WIN32API | 
|  | # define GDB_DEFAULT_HOST_CHARSET "CP1252" | 
|  | #else | 
|  | # define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" | 
|  | #endif | 
|  | #define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET | 
|  | #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" | 
|  | #undef DEFAULT_CHARSET_NAMES | 
|  | #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , | 
|  |  | 
|  | #undef iconv_t | 
|  | #define iconv_t int | 
|  | #undef iconv_open | 
|  | #define iconv_open phony_iconv_open | 
|  | #undef iconv | 
|  | #define iconv phony_iconv | 
|  | #undef iconv_close | 
|  | #define iconv_close phony_iconv_close | 
|  |  | 
|  | #undef ICONV_CONST | 
|  | #define ICONV_CONST const | 
|  |  | 
|  | /* We allow conversions from UTF-32, wchar_t, and the host charset. | 
|  | We allow conversions to wchar_t and the host charset. | 
|  | Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE, | 
|  | 0 otherwise.  This is used as a flag in calls to iconv.  */ | 
|  |  | 
|  | static iconv_t | 
|  | phony_iconv_open (const char *to, const char *from) | 
|  | { | 
|  | if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) | 
|  | return -1; | 
|  |  | 
|  | if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32")) | 
|  | return 1; | 
|  |  | 
|  | if (!strcmp (from, "UTF-32LE")) | 
|  | return 2; | 
|  |  | 
|  | if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) | 
|  | return -1; | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static int | 
|  | phony_iconv_close (iconv_t arg) | 
|  | { | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static size_t | 
|  | phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, | 
|  | char **outbuf, size_t *outbytesleft) | 
|  | { | 
|  | if (utf_flag) | 
|  | { | 
|  | enum bfd_endian endian | 
|  | = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE; | 
|  | while (*inbytesleft >= 4) | 
|  | { | 
|  | unsigned long c | 
|  | = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian); | 
|  |  | 
|  | if (c >= 256) | 
|  | { | 
|  | errno = EILSEQ; | 
|  | return -1; | 
|  | } | 
|  | if (*outbytesleft < 1) | 
|  | { | 
|  | errno = E2BIG; | 
|  | return -1; | 
|  | } | 
|  | **outbuf = c & 0xff; | 
|  | ++*outbuf; | 
|  | --*outbytesleft; | 
|  |  | 
|  | *inbuf += 4; | 
|  | *inbytesleft -= 4; | 
|  | } | 
|  | if (*inbytesleft) | 
|  | { | 
|  | /* Partial sequence on input.  */ | 
|  | errno = EINVAL; | 
|  | return -1; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | /* In all other cases we simply copy input bytes to the | 
|  | output.  */ | 
|  | size_t amt = *inbytesleft; | 
|  |  | 
|  | if (amt > *outbytesleft) | 
|  | amt = *outbytesleft; | 
|  | memcpy (*outbuf, *inbuf, amt); | 
|  | *inbuf += amt; | 
|  | *outbuf += amt; | 
|  | *inbytesleft -= amt; | 
|  | *outbytesleft -= amt; | 
|  | if (*inbytesleft) | 
|  | { | 
|  | errno = E2BIG; | 
|  | return -1; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* The number of non-reversible conversions -- but they were all | 
|  | reversible.  */ | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | #else /* PHONY_ICONV */ | 
|  |  | 
|  | /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it | 
|  | to ENOENT, while gnulib defines it to a different value.  Always | 
|  | map ENOENT to gnulib's EILSEQ, leaving callers agnostic.  */ | 
|  |  | 
|  | static size_t | 
|  | gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft, | 
|  | char **outbuf, size_t *outbytesleft) | 
|  | { | 
|  | size_t ret; | 
|  |  | 
|  | ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft); | 
|  | if (errno == ENOENT) | 
|  | errno = EILSEQ; | 
|  | return ret; | 
|  | } | 
|  |  | 
|  | #undef iconv | 
|  | #define iconv gdb_iconv | 
|  |  | 
|  | #endif /* PHONY_ICONV */ | 
|  |  | 
|  |  | 
|  | /* The global lists of character sets and translations.  */ | 
|  |  | 
|  |  | 
|  | #ifndef GDB_DEFAULT_TARGET_CHARSET | 
|  | #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" | 
|  | #endif | 
|  |  | 
|  | #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET | 
|  | #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" | 
|  | #endif | 
|  |  | 
|  | static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; | 
|  | static const char *host_charset_name = "auto"; | 
|  | static void | 
|  | show_host_charset_name (struct ui_file *file, int from_tty, | 
|  | struct cmd_list_element *c, | 
|  | const char *value) | 
|  | { | 
|  | if (!strcmp (value, "auto")) | 
|  | gdb_printf (file, | 
|  | _("The host character set is \"auto; currently %s\".\n"), | 
|  | auto_host_charset_name); | 
|  | else | 
|  | gdb_printf (file, _("The host character set is \"%s\".\n"), value); | 
|  | } | 
|  |  | 
|  | static const char *target_charset_name = "auto"; | 
|  | static void | 
|  | show_target_charset_name (struct ui_file *file, int from_tty, | 
|  | struct cmd_list_element *c, const char *value) | 
|  | { | 
|  | if (!strcmp (value, "auto")) | 
|  | gdb_printf (file, | 
|  | _("The target character set is \"auto; " | 
|  | "currently %s\".\n"), | 
|  | gdbarch_auto_charset (get_current_arch ())); | 
|  | else | 
|  | gdb_printf (file, _("The target character set is \"%s\".\n"), | 
|  | value); | 
|  | } | 
|  |  | 
|  | static const char *target_wide_charset_name = "auto"; | 
|  | static void | 
|  | show_target_wide_charset_name (struct ui_file *file, | 
|  | int from_tty, | 
|  | struct cmd_list_element *c, | 
|  | const char *value) | 
|  | { | 
|  | if (!strcmp (value, "auto")) | 
|  | gdb_printf (file, | 
|  | _("The target wide character set is \"auto; " | 
|  | "currently %s\".\n"), | 
|  | gdbarch_auto_wide_charset (get_current_arch ())); | 
|  | else | 
|  | gdb_printf (file, _("The target wide character set is \"%s\".\n"), | 
|  | value); | 
|  | } | 
|  |  | 
|  | static const char * const default_charset_names[] = | 
|  | { | 
|  | DEFAULT_CHARSET_NAMES | 
|  | 0 | 
|  | }; | 
|  |  | 
|  | static const char * const *charset_enum; | 
|  |  | 
|  |  | 
|  | /* If the target wide character set has big- or little-endian | 
|  | variants, these are the corresponding names.  */ | 
|  | static const char *target_wide_charset_be_name; | 
|  | static const char *target_wide_charset_le_name; | 
|  |  | 
|  | /* The architecture for which the BE- and LE-names are valid.  */ | 
|  | static struct gdbarch *be_le_arch; | 
|  |  | 
|  | /* A helper function which sets the target wide big- and little-endian | 
|  | character set names, if possible.  */ | 
|  |  | 
|  | static void | 
|  | set_be_le_names (struct gdbarch *gdbarch) | 
|  | { | 
|  | if (be_le_arch == gdbarch) | 
|  | return; | 
|  | be_le_arch = gdbarch; | 
|  |  | 
|  | #ifdef PHONY_ICONV | 
|  | /* Match the wide charset names recognized by phony_iconv_open.  */ | 
|  | target_wide_charset_le_name = "UTF-32LE"; | 
|  | target_wide_charset_be_name = "UTF-32BE"; | 
|  | #else | 
|  | int i, len; | 
|  | const char *target_wide; | 
|  |  | 
|  | target_wide_charset_le_name = NULL; | 
|  | target_wide_charset_be_name = NULL; | 
|  |  | 
|  | target_wide = target_wide_charset_name; | 
|  | if (!strcmp (target_wide, "auto")) | 
|  | target_wide = gdbarch_auto_wide_charset (gdbarch); | 
|  |  | 
|  | len = strlen (target_wide); | 
|  | for (i = 0; charset_enum[i]; ++i) | 
|  | { | 
|  | if (strncmp (target_wide, charset_enum[i], len)) | 
|  | continue; | 
|  | if ((charset_enum[i][len] == 'B' | 
|  | || charset_enum[i][len] == 'L') | 
|  | && charset_enum[i][len + 1] == 'E' | 
|  | && charset_enum[i][len + 2] == '\0') | 
|  | { | 
|  | if (charset_enum[i][len] == 'B') | 
|  | target_wide_charset_be_name = charset_enum[i]; | 
|  | else | 
|  | target_wide_charset_le_name = charset_enum[i]; | 
|  | } | 
|  | } | 
|  | # endif  /* PHONY_ICONV */ | 
|  | } | 
|  |  | 
|  | /* 'Set charset', 'set host-charset', 'set target-charset', 'set | 
|  | target-wide-charset', 'set charset' sfunc's.  */ | 
|  |  | 
|  | static void | 
|  | validate (struct gdbarch *gdbarch) | 
|  | { | 
|  | iconv_t desc; | 
|  | const char *host_cset = host_charset (); | 
|  | const char *target_cset = target_charset (gdbarch); | 
|  | const char *target_wide_cset = target_wide_charset_name; | 
|  |  | 
|  | if (!strcmp (target_wide_cset, "auto")) | 
|  | target_wide_cset = gdbarch_auto_wide_charset (gdbarch); | 
|  |  | 
|  | desc = iconv_open (target_wide_cset, host_cset); | 
|  | if (desc == (iconv_t) -1) | 
|  | error (_("Cannot convert between character sets `%s' and `%s'"), | 
|  | target_wide_cset, host_cset); | 
|  | iconv_close (desc); | 
|  |  | 
|  | desc = iconv_open (target_cset, host_cset); | 
|  | if (desc == (iconv_t) -1) | 
|  | error (_("Cannot convert between character sets `%s' and `%s'"), | 
|  | target_cset, host_cset); | 
|  | iconv_close (desc); | 
|  |  | 
|  | /* Clear the cache.  */ | 
|  | be_le_arch = NULL; | 
|  | } | 
|  |  | 
|  | /* This is the sfunc for the 'set charset' command.  */ | 
|  | static void | 
|  | set_charset_sfunc (const char *charset, int from_tty, | 
|  | struct cmd_list_element *c) | 
|  | { | 
|  | /* CAREFUL: set the target charset here as well.  */ | 
|  | target_charset_name = host_charset_name; | 
|  | validate (get_current_arch ()); | 
|  | } | 
|  |  | 
|  | /* 'set host-charset' command sfunc.  We need a wrapper here because | 
|  | the function needs to have a specific signature.  */ | 
|  | static void | 
|  | set_host_charset_sfunc (const char *charset, int from_tty, | 
|  | struct cmd_list_element *c) | 
|  | { | 
|  | validate (get_current_arch ()); | 
|  | } | 
|  |  | 
|  | /* Wrapper for the 'set target-charset' command.  */ | 
|  | static void | 
|  | set_target_charset_sfunc (const char *charset, int from_tty, | 
|  | struct cmd_list_element *c) | 
|  | { | 
|  | validate (get_current_arch ()); | 
|  | } | 
|  |  | 
|  | /* Wrapper for the 'set target-wide-charset' command.  */ | 
|  | static void | 
|  | set_target_wide_charset_sfunc (const char *charset, int from_tty, | 
|  | struct cmd_list_element *c) | 
|  | { | 
|  | validate (get_current_arch ()); | 
|  | } | 
|  |  | 
|  | /* sfunc for the 'show charset' command.  */ | 
|  | static void | 
|  | show_charset (struct ui_file *file, int from_tty, | 
|  | struct cmd_list_element *c, | 
|  | const char *name) | 
|  | { | 
|  | show_host_charset_name (file, from_tty, c, host_charset_name); | 
|  | show_target_charset_name (file, from_tty, c, target_charset_name); | 
|  | show_target_wide_charset_name (file, from_tty, c, | 
|  | target_wide_charset_name); | 
|  | } | 
|  |  | 
|  |  | 
|  | /* Accessor functions.  */ | 
|  |  | 
|  | const char * | 
|  | host_charset (void) | 
|  | { | 
|  | if (!strcmp (host_charset_name, "auto")) | 
|  | return auto_host_charset_name; | 
|  | return host_charset_name; | 
|  | } | 
|  |  | 
|  | const char * | 
|  | target_charset (struct gdbarch *gdbarch) | 
|  | { | 
|  | if (!strcmp (target_charset_name, "auto")) | 
|  | return gdbarch_auto_charset (gdbarch); | 
|  | return target_charset_name; | 
|  | } | 
|  |  | 
|  | const char * | 
|  | target_wide_charset (struct gdbarch *gdbarch) | 
|  | { | 
|  | enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); | 
|  |  | 
|  | set_be_le_names (gdbarch); | 
|  | if (byte_order == BFD_ENDIAN_BIG) | 
|  | { | 
|  | if (target_wide_charset_be_name) | 
|  | return target_wide_charset_be_name; | 
|  | } | 
|  | else | 
|  | { | 
|  | if (target_wide_charset_le_name) | 
|  | return target_wide_charset_le_name; | 
|  | } | 
|  |  | 
|  | if (!strcmp (target_wide_charset_name, "auto")) | 
|  | return gdbarch_auto_wide_charset (gdbarch); | 
|  |  | 
|  | return target_wide_charset_name; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* Host character set management.  For the time being, we assume that | 
|  | the host character set is some superset of ASCII.  */ | 
|  |  | 
|  | char | 
|  | host_letter_to_control_character (char c) | 
|  | { | 
|  | if (c == '?') | 
|  | return 0177; | 
|  | return c & 0237; | 
|  | } | 
|  |  | 
|  |  | 
|  | /* Public character management functions.  */ | 
|  |  | 
|  | class iconv_wrapper | 
|  | { | 
|  | public: | 
|  |  | 
|  | iconv_wrapper (const char *to, const char *from) | 
|  | { | 
|  | m_desc = iconv_open (to, from); | 
|  | if (m_desc == (iconv_t) -1) | 
|  | perror_with_name (_("Converting character sets")); | 
|  | } | 
|  |  | 
|  | ~iconv_wrapper () | 
|  | { | 
|  | iconv_close (m_desc); | 
|  | } | 
|  |  | 
|  | size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp, | 
|  | size_t *outleft) | 
|  | { | 
|  | return iconv (m_desc, inp, inleft, outp, outleft); | 
|  | } | 
|  |  | 
|  | private: | 
|  |  | 
|  | iconv_t m_desc; | 
|  | }; | 
|  |  | 
|  | void | 
|  | convert_between_encodings (const char *from, const char *to, | 
|  | const gdb_byte *bytes, unsigned int num_bytes, | 
|  | int width, struct obstack *output, | 
|  | enum transliterations translit) | 
|  | { | 
|  | size_t inleft; | 
|  | ICONV_CONST char *inp; | 
|  | unsigned int space_request; | 
|  |  | 
|  | /* Often, the host and target charsets will be the same.  */ | 
|  | if (!strcmp (from, to)) | 
|  | { | 
|  | obstack_grow (output, bytes, num_bytes); | 
|  | return; | 
|  | } | 
|  |  | 
|  | iconv_wrapper desc (to, from); | 
|  |  | 
|  | inleft = num_bytes; | 
|  | inp = (ICONV_CONST char *) bytes; | 
|  |  | 
|  | space_request = num_bytes; | 
|  |  | 
|  | while (inleft > 0) | 
|  | { | 
|  | char *outp; | 
|  | size_t outleft, r; | 
|  | int old_size; | 
|  |  | 
|  | old_size = obstack_object_size (output); | 
|  | obstack_blank (output, space_request); | 
|  |  | 
|  | outp = (char *) obstack_base (output) + old_size; | 
|  | outleft = space_request; | 
|  |  | 
|  | r = desc.convert (&inp, &inleft, &outp, &outleft); | 
|  |  | 
|  | /* Now make sure that the object on the obstack only includes | 
|  | bytes we have converted.  */ | 
|  | obstack_blank_fast (output, -(ssize_t) outleft); | 
|  |  | 
|  | if (r == (size_t) -1) | 
|  | { | 
|  | switch (errno) | 
|  | { | 
|  | case EILSEQ: | 
|  | { | 
|  | int i; | 
|  |  | 
|  | /* Invalid input sequence.  */ | 
|  | if (translit == translit_none) | 
|  | error (_("Could not convert character " | 
|  | "to `%s' character set"), to); | 
|  |  | 
|  | /* We emit escape sequence for the bytes, skip them, | 
|  | and try again.  */ | 
|  | for (i = 0; i < width; ++i) | 
|  | { | 
|  | char octal[5]; | 
|  |  | 
|  | xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff); | 
|  | obstack_grow_str (output, octal); | 
|  |  | 
|  | ++inp; | 
|  | --inleft; | 
|  | } | 
|  | } | 
|  | break; | 
|  |  | 
|  | case E2BIG: | 
|  | /* We ran out of space in the output buffer.  Make it | 
|  | bigger next time around.  */ | 
|  | space_request *= 2; | 
|  | break; | 
|  |  | 
|  | case EINVAL: | 
|  | /* Incomplete input sequence.  FIXME: ought to report this | 
|  | to the caller somehow.  */ | 
|  | inleft = 0; | 
|  | break; | 
|  |  | 
|  | default: | 
|  | perror_with_name (_("Internal error while " | 
|  | "converting character sets")); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  |  | 
|  | /* Create a new iterator.  */ | 
|  | wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes, | 
|  | const char *charset, size_t width) | 
|  | : m_input (input), | 
|  | m_bytes (bytes), | 
|  | m_width (width), | 
|  | m_out (1) | 
|  | { | 
|  | m_desc = iconv_open (INTERMEDIATE_ENCODING, charset); | 
|  | if (m_desc == (iconv_t) -1) | 
|  | perror_with_name (_("Converting character sets")); | 
|  | } | 
|  |  | 
|  | wchar_iterator::~wchar_iterator () | 
|  | { | 
|  | if (m_desc != (iconv_t) -1) | 
|  | iconv_close (m_desc); | 
|  | } | 
|  |  | 
|  | int | 
|  | wchar_iterator::iterate (enum wchar_iterate_result *out_result, | 
|  | gdb_wchar_t **out_chars, | 
|  | const gdb_byte **ptr, | 
|  | size_t *len) | 
|  | { | 
|  | size_t out_request; | 
|  |  | 
|  | /* Try to convert some characters.  At first we try to convert just | 
|  | a single character.  The reason for this is that iconv does not | 
|  | necessarily update its outgoing arguments when it encounters an | 
|  | invalid input sequence -- but we want to reliably report this to | 
|  | our caller so it can emit an escape sequence.  */ | 
|  | out_request = 1; | 
|  | while (m_bytes > 0) | 
|  | { | 
|  | ICONV_CONST char *inptr = (ICONV_CONST char *) m_input; | 
|  | char *outptr = (char *) m_out.data (); | 
|  | const gdb_byte *orig_inptr = m_input; | 
|  | size_t orig_in = m_bytes; | 
|  | size_t out_avail = out_request * sizeof (gdb_wchar_t); | 
|  | size_t num; | 
|  | size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail); | 
|  |  | 
|  | m_input = (gdb_byte *) inptr; | 
|  |  | 
|  | if (r == (size_t) -1) | 
|  | { | 
|  | switch (errno) | 
|  | { | 
|  | case EILSEQ: | 
|  | /* Invalid input sequence.  We still might have | 
|  | converted a character; if so, return it.  */ | 
|  | if (out_avail < out_request * sizeof (gdb_wchar_t)) | 
|  | break; | 
|  |  | 
|  | /* Otherwise skip the first invalid character, and let | 
|  | the caller know about it.  */ | 
|  | *out_result = wchar_iterate_invalid; | 
|  | *ptr = m_input; | 
|  | *len = m_width; | 
|  | m_input += m_width; | 
|  | m_bytes -= m_width; | 
|  | return 0; | 
|  |  | 
|  | case E2BIG: | 
|  | /* We ran out of space.  We still might have converted a | 
|  | character; if so, return it.  Otherwise, grow the | 
|  | buffer and try again.  */ | 
|  | if (out_avail < out_request * sizeof (gdb_wchar_t)) | 
|  | break; | 
|  |  | 
|  | ++out_request; | 
|  | if (out_request > m_out.size ()) | 
|  | m_out.resize (out_request); | 
|  | continue; | 
|  |  | 
|  | case EINVAL: | 
|  | /* Incomplete input sequence.  Let the caller know, and | 
|  | arrange for future calls to see EOF.  */ | 
|  | *out_result = wchar_iterate_incomplete; | 
|  | *ptr = m_input; | 
|  | *len = m_bytes; | 
|  | m_bytes = 0; | 
|  | return 0; | 
|  |  | 
|  | default: | 
|  | perror_with_name (_("Internal error while " | 
|  | "converting character sets")); | 
|  | } | 
|  | } | 
|  |  | 
|  | /* We converted something.  */ | 
|  | num = out_request - out_avail / sizeof (gdb_wchar_t); | 
|  | *out_result = wchar_iterate_ok; | 
|  | *out_chars = m_out.data (); | 
|  | *ptr = orig_inptr; | 
|  | *len = orig_in - m_bytes; | 
|  | return num; | 
|  | } | 
|  |  | 
|  | /* Really done.  */ | 
|  | *out_result = wchar_iterate_eof; | 
|  | return -1; | 
|  | } | 
|  |  | 
|  | struct charset_vector | 
|  | { | 
|  | ~charset_vector () | 
|  | { | 
|  | /* Note that we do not call charset_vector::clear, which would also xfree | 
|  | the elements.  This destructor is only called after exit, at which point | 
|  | those will be freed anyway on process exit, so not freeing them now is | 
|  | not classified as a memory leak.  OTOH, freeing them now might be | 
|  | classified as a data race, because some worker thread might still be | 
|  | accessing them.  */ | 
|  | charsets.clear (); | 
|  | } | 
|  |  | 
|  | void clear () | 
|  | { | 
|  | for (char *c : charsets) | 
|  | xfree (c); | 
|  |  | 
|  | charsets.clear (); | 
|  | } | 
|  |  | 
|  | std::vector<char *> charsets; | 
|  | }; | 
|  |  | 
|  | static charset_vector charsets; | 
|  |  | 
|  | #ifdef PHONY_ICONV | 
|  |  | 
|  | static void | 
|  | find_charset_names (void) | 
|  | { | 
|  | charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET)); | 
|  | charsets.charsets.push_back (NULL); | 
|  | } | 
|  |  | 
|  | #else /* PHONY_ICONV */ | 
|  |  | 
|  | /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but | 
|  | provides different symbols in the static and dynamic libraries. | 
|  | So, configure may see libiconvlist but not iconvlist.  But, calling | 
|  | iconvlist is the right thing to do and will work.  Hence we do a | 
|  | check here but unconditionally call iconvlist below.  */ | 
|  | #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) | 
|  |  | 
|  | /* A helper function that adds some character sets to the vector of | 
|  | all character sets.  This is a callback function for iconvlist.  */ | 
|  |  | 
|  | static int | 
|  | add_one (unsigned int count, const char *const *names, void *data) | 
|  | { | 
|  | unsigned int i; | 
|  |  | 
|  | for (i = 0; i < count; ++i) | 
|  | charsets.charsets.push_back (xstrdup (names[i])); | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void | 
|  | find_charset_names (void) | 
|  | { | 
|  | iconvlist (add_one, NULL); | 
|  |  | 
|  | charsets.charsets.push_back (NULL); | 
|  | } | 
|  |  | 
|  | #else | 
|  |  | 
|  | /* Return non-zero if LINE (output from iconv) should be ignored. | 
|  | Older iconv programs (e.g. 2.2.2) include the human readable | 
|  | introduction even when stdout is not a tty.  Newer versions omit | 
|  | the intro if stdout is not a tty.  */ | 
|  |  | 
|  | static int | 
|  | ignore_line_p (const char *line) | 
|  | { | 
|  | /* This table is used to filter the output.  If this text appears | 
|  | anywhere in the line, it is ignored (strstr is used).  */ | 
|  | static const char * const ignore_lines[] = | 
|  | { | 
|  | "The following", | 
|  | "not necessarily", | 
|  | "the FROM and TO", | 
|  | "listed with several", | 
|  | NULL | 
|  | }; | 
|  | int i; | 
|  |  | 
|  | for (i = 0; ignore_lines[i] != NULL; ++i) | 
|  | { | 
|  | if (strstr (line, ignore_lines[i]) != NULL) | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | static void | 
|  | find_charset_names (void) | 
|  | { | 
|  | struct pex_obj *child; | 
|  | const char *args[3]; | 
|  | int err, status; | 
|  | int fail = 1; | 
|  | int flags; | 
|  | gdb_environ iconv_env = gdb_environ::from_host_environ (); | 
|  | char *iconv_program; | 
|  |  | 
|  | /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is | 
|  | not a tty.  We need to recognize it and ignore it.  This text is | 
|  | subject to translation, so force LANGUAGE=C.  */ | 
|  | iconv_env.set ("LANGUAGE", "C"); | 
|  | iconv_env.set ("LC_ALL", "C"); | 
|  |  | 
|  | child = pex_init (PEX_USE_PIPES, "iconv", NULL); | 
|  |  | 
|  | #ifdef ICONV_BIN | 
|  | { | 
|  | std::string iconv_dir = relocate_gdb_directory (ICONV_BIN, | 
|  | ICONV_BIN_RELOCATABLE); | 
|  | iconv_program | 
|  | = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", (char *) NULL); | 
|  | } | 
|  | #else | 
|  | iconv_program = xstrdup ("iconv"); | 
|  | #endif | 
|  | args[0] = iconv_program; | 
|  | args[1] = "-l"; | 
|  | args[2] = NULL; | 
|  | flags = PEX_STDERR_TO_STDOUT; | 
|  | #ifndef ICONV_BIN | 
|  | flags |= PEX_SEARCH; | 
|  | #endif | 
|  | /* Note that we simply ignore errors here.  */ | 
|  | if (!pex_run_in_environment (child, flags, | 
|  | args[0], const_cast<char **> (args), | 
|  | iconv_env.envp (), | 
|  | NULL, NULL, &err)) | 
|  | { | 
|  | FILE *in = pex_read_output (child, 0); | 
|  |  | 
|  | /* POSIX says that iconv -l uses an unspecified format.  We | 
|  | parse the glibc and libiconv formats; feel free to add others | 
|  | as needed.  */ | 
|  |  | 
|  | while (in != NULL && !feof (in)) | 
|  | { | 
|  | /* The size of buf is chosen arbitrarily.  */ | 
|  | char buf[1024]; | 
|  | char *start, *r; | 
|  | int len; | 
|  |  | 
|  | r = fgets (buf, sizeof (buf), in); | 
|  | if (!r) | 
|  | break; | 
|  | len = strlen (r); | 
|  | if (len <= 3) | 
|  | continue; | 
|  | if (ignore_line_p (r)) | 
|  | continue; | 
|  |  | 
|  | /* Strip off the newline.  */ | 
|  | --len; | 
|  | /* Strip off one or two '/'s.  glibc will print lines like | 
|  | "8859_7//", but also "10646-1:1993/UCS4/".  */ | 
|  | if (buf[len - 1] == '/') | 
|  | --len; | 
|  | if (buf[len - 1] == '/') | 
|  | --len; | 
|  | buf[len] = '\0'; | 
|  |  | 
|  | /* libiconv will print multiple entries per line, separated | 
|  | by spaces.  Older iconvs will print multiple entries per | 
|  | line, indented by two spaces, and separated by ", " | 
|  | (i.e. the human readable form).  */ | 
|  | start = buf; | 
|  | while (1) | 
|  | { | 
|  | int keep_going; | 
|  | char *p; | 
|  |  | 
|  | /* Skip leading blanks.  */ | 
|  | for (p = start; *p && *p == ' '; ++p) | 
|  | ; | 
|  | start = p; | 
|  | /* Find the next space, comma, or end-of-line.  */ | 
|  | for ( ; *p && *p != ' ' && *p != ','; ++p) | 
|  | ; | 
|  | /* Ignore an empty result.  */ | 
|  | if (p == start) | 
|  | break; | 
|  | keep_going = *p; | 
|  | *p = '\0'; | 
|  | charsets.charsets.push_back (xstrdup (start)); | 
|  | if (!keep_going) | 
|  | break; | 
|  | /* Skip any extra spaces.  */ | 
|  | for (start = p + 1; *start && *start == ' '; ++start) | 
|  | ; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (pex_get_status (child, 1, &status) | 
|  | && WIFEXITED (status) && !WEXITSTATUS (status)) | 
|  | fail = 0; | 
|  |  | 
|  | } | 
|  |  | 
|  | xfree (iconv_program); | 
|  | pex_free (child); | 
|  |  | 
|  | if (fail) | 
|  | { | 
|  | /* Some error occurred, so drop the vector.  */ | 
|  | charsets.clear (); | 
|  | } | 
|  | else | 
|  | charsets.charsets.push_back (NULL); | 
|  | } | 
|  |  | 
|  | #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ | 
|  | #endif /* PHONY_ICONV */ | 
|  |  | 
|  | /* The "auto" target charset used by default_auto_charset.  */ | 
|  | static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; | 
|  |  | 
|  | const char * | 
|  | default_auto_charset (void) | 
|  | { | 
|  | return auto_target_charset_name; | 
|  | } | 
|  |  | 
|  | const char * | 
|  | default_auto_wide_charset (void) | 
|  | { | 
|  | return GDB_DEFAULT_TARGET_WIDE_CHARSET; | 
|  | } | 
|  |  | 
|  |  | 
|  | #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION | 
|  | /* Macro used for UTF or UCS endianness suffix.  */ | 
|  | #if WORDS_BIGENDIAN | 
|  | #define ENDIAN_SUFFIX "BE" | 
|  | #else | 
|  | #define ENDIAN_SUFFIX "LE" | 
|  | #endif | 
|  |  | 
|  | /* GDB cannot handle strings correctly if this size is different.  */ | 
|  |  | 
|  | gdb_static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4); | 
|  |  | 
|  | /* intermediate_encoding returns the charset used internally by | 
|  | GDB to convert between target and host encodings. As the test above | 
|  | compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. | 
|  | UTF-16/32 is tested first, UCS-2/4 is tested as a second option, | 
|  | otherwise an error is generated.  */ | 
|  |  | 
|  | const char * | 
|  | intermediate_encoding (void) | 
|  | { | 
|  | iconv_t desc; | 
|  | static const char *stored_result = NULL; | 
|  | gdb::unique_xmalloc_ptr<char> result; | 
|  |  | 
|  | if (stored_result) | 
|  | return stored_result; | 
|  | result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), | 
|  | ENDIAN_SUFFIX); | 
|  | /* Check that the name is supported by iconv_open.  */ | 
|  | desc = iconv_open (result.get (), host_charset ()); | 
|  | if (desc != (iconv_t) -1) | 
|  | { | 
|  | iconv_close (desc); | 
|  | stored_result = result.release (); | 
|  | return stored_result; | 
|  | } | 
|  | /* Second try, with UCS-2 type.  */ | 
|  | result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), | 
|  | ENDIAN_SUFFIX); | 
|  | /* Check that the name is supported by iconv_open.  */ | 
|  | desc = iconv_open (result.get (), host_charset ()); | 
|  | if (desc != (iconv_t) -1) | 
|  | { | 
|  | iconv_close (desc); | 
|  | stored_result = result.release (); | 
|  | return stored_result; | 
|  | } | 
|  | /* No valid charset found, generate error here.  */ | 
|  | error (_("Unable to find a valid charset for string conversions")); | 
|  | } | 
|  |  | 
|  | #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ | 
|  |  | 
|  | void _initialize_charset (); | 
|  | void | 
|  | _initialize_charset () | 
|  | { | 
|  | /* The first element is always "auto".  */ | 
|  | charsets.charsets.push_back (xstrdup ("auto")); | 
|  | find_charset_names (); | 
|  |  | 
|  | if (charsets.charsets.size () > 1) | 
|  | charset_enum = (const char * const *) charsets.charsets.data (); | 
|  | else | 
|  | charset_enum = default_charset_names; | 
|  |  | 
|  | #ifndef PHONY_ICONV | 
|  | #ifdef HAVE_LANGINFO_CODESET | 
|  | /* The result of nl_langinfo may be overwritten later.  This may | 
|  | leak a little memory, if the user later changes the host charset, | 
|  | but that doesn't matter much.  */ | 
|  | auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); | 
|  | /* Solaris will return `646' here -- but the Solaris iconv then does | 
|  | not accept this.  Darwin (and maybe FreeBSD) may return "" here, | 
|  | which GNU libiconv doesn't like (infinite loop).  */ | 
|  | if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) | 
|  | auto_host_charset_name = "ASCII"; | 
|  | auto_target_charset_name = auto_host_charset_name; | 
|  | #elif defined (USE_WIN32API) | 
|  | { | 
|  | /* "CP" + x<=5 digits + paranoia.  */ | 
|  | static char w32_host_default_charset[16]; | 
|  |  | 
|  | snprintf (w32_host_default_charset, sizeof w32_host_default_charset, | 
|  | "CP%d", GetACP()); | 
|  | auto_host_charset_name = w32_host_default_charset; | 
|  | auto_target_charset_name = auto_host_charset_name; | 
|  | } | 
|  | #endif | 
|  | #endif | 
|  |  | 
|  | /* Recall that the first element is always "auto".  */ | 
|  | host_charset_name = charset_enum[0]; | 
|  | gdb_assert (strcmp (host_charset_name, "auto") == 0); | 
|  | add_setshow_enum_cmd ("charset", class_support, | 
|  | charset_enum, &host_charset_name, _("\ | 
|  | Set the host and target character sets."), _("\ | 
|  | Show the host and target character sets."), _("\ | 
|  | The `host character set' is the one used by the system GDB is running on.\n\ | 
|  | The `target character set' is the one used by the program being debugged.\n\ | 
|  | You may only use supersets of ASCII for your host character set; GDB does\n\ | 
|  | not support any others.\n\ | 
|  | To see a list of the character sets GDB supports, type `set charset <TAB>'."), | 
|  | /* Note that the sfunc below needs to set | 
|  | target_charset_name, because the 'set | 
|  | charset' command sets two variables.  */ | 
|  | set_charset_sfunc, | 
|  | show_charset, | 
|  | &setlist, &showlist); | 
|  |  | 
|  | add_setshow_enum_cmd ("host-charset", class_support, | 
|  | charset_enum, &host_charset_name, _("\ | 
|  | Set the host character set."), _("\ | 
|  | Show the host character set."), _("\ | 
|  | The `host character set' is the one used by the system GDB is running on.\n\ | 
|  | You may only use supersets of ASCII for your host character set; GDB does\n\ | 
|  | not support any others.\n\ | 
|  | To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), | 
|  | set_host_charset_sfunc, | 
|  | show_host_charset_name, | 
|  | &setlist, &showlist); | 
|  |  | 
|  | /* Recall that the first element is always "auto".  */ | 
|  | target_charset_name = charset_enum[0]; | 
|  | gdb_assert (strcmp (target_charset_name, "auto") == 0); | 
|  | add_setshow_enum_cmd ("target-charset", class_support, | 
|  | charset_enum, &target_charset_name, _("\ | 
|  | Set the target character set."), _("\ | 
|  | Show the target character set."), _("\ | 
|  | The `target character set' is the one used by the program being debugged.\n\ | 
|  | GDB translates characters and strings between the host and target\n\ | 
|  | character sets as needed.\n\ | 
|  | To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), | 
|  | set_target_charset_sfunc, | 
|  | show_target_charset_name, | 
|  | &setlist, &showlist); | 
|  |  | 
|  | /* Recall that the first element is always "auto".  */ | 
|  | target_wide_charset_name = charset_enum[0]; | 
|  | gdb_assert (strcmp (target_wide_charset_name, "auto") == 0); | 
|  | add_setshow_enum_cmd ("target-wide-charset", class_support, | 
|  | charset_enum, &target_wide_charset_name, | 
|  | _("\ | 
|  | Set the target wide character set."), _("\ | 
|  | Show the target wide character set."), _("\ | 
|  | The `target wide character set' is the one used by the program being debugged.\ | 
|  | \nIn particular it is the encoding used by `wchar_t'.\n\ | 
|  | GDB translates characters and strings between the host and target\n\ | 
|  | character sets as needed.\n\ | 
|  | To see a list of the character sets GDB supports, type\n\ | 
|  | `set target-wide-charset'<TAB>"), | 
|  | set_target_wide_charset_sfunc, | 
|  | show_target_wide_charset_name, | 
|  | &setlist, &showlist); | 
|  | } |