| /* Character set conversion support for GDB. |
| |
| Copyright (C) 2001-2024 Free Software Foundation, Inc. |
| |
| This file is part of GDB. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
| |
| #include "charset.h" |
| #include "cli/cli-cmds.h" |
| #include "gdbsupport/gdb_obstack.h" |
| #include "gdbsupport/gdb_wait.h" |
| #include "charset-list.h" |
| #include "gdbsupport/environ.h" |
| #include "arch-utils.h" |
| #include "gdbsupport/gdb_vecs.h" |
| #include <ctype.h> |
| |
| #ifdef USE_WIN32API |
| #include <windows.h> |
| #endif |
| |
| /* How GDB's character set support works |
| |
| GDB has three global settings: |
| |
| - The `current host character set' is the character set GDB should |
| use in talking to the user, and which (hopefully) the user's |
| terminal knows how to display properly. Most users should not |
| change this. |
| |
| - The `current target character set' is the character set the |
| program being debugged uses. |
| |
| - The `current target wide character set' is the wide character set |
| the program being debugged uses, that is, the encoding used for |
| wchar_t. |
| |
| There are commands to set each of these, and mechanisms for |
| choosing reasonable default values. GDB has a global list of |
| character sets that it can use as its host or target character |
| sets. |
| |
| The header file `charset.h' declares various functions that |
| different pieces of GDB need to perform tasks like: |
| |
| - printing target strings and characters to the user's terminal |
| (mostly target->host conversions), |
| |
| - building target-appropriate representations of strings and |
| characters the user enters in expressions (mostly host->target |
| conversions), |
| |
| and so on. |
| |
| To avoid excessive code duplication and maintenance efforts, |
| GDB simply requires a capable iconv function. Users on platforms |
| without a suitable iconv can use the GNU iconv library. */ |
| |
| |
| #ifdef PHONY_ICONV |
| |
| #include "extract-store-integer.h" |
| |
| /* Provide a phony iconv that does as little as possible. Also, |
| arrange for there to be a single available character set. */ |
| |
| #undef GDB_DEFAULT_HOST_CHARSET |
| #ifdef USE_WIN32API |
| # define GDB_DEFAULT_HOST_CHARSET "CP1252" |
| #else |
| # define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1" |
| #endif |
| #define GDB_DEFAULT_TARGET_CHARSET GDB_DEFAULT_HOST_CHARSET |
| #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" |
| #undef DEFAULT_CHARSET_NAMES |
| #define DEFAULT_CHARSET_NAMES GDB_DEFAULT_HOST_CHARSET , |
| |
| #undef iconv_t |
| #define iconv_t int |
| #undef iconv_open |
| #define iconv_open phony_iconv_open |
| #undef iconv |
| #define iconv phony_iconv |
| #undef iconv_close |
| #define iconv_close phony_iconv_close |
| |
| #undef ICONV_CONST |
| #define ICONV_CONST const |
| |
| /* We allow conversions from UTF-32, wchar_t, and the host charset. |
| We allow conversions to wchar_t and the host charset. |
| Return 1 if we are converting from UTF-32BE, 2 if from UTF32-LE, |
| 0 otherwise. This is used as a flag in calls to iconv. */ |
| |
| static iconv_t |
| phony_iconv_open (const char *to, const char *from) |
| { |
| if (strcmp (to, "wchar_t") && strcmp (to, GDB_DEFAULT_HOST_CHARSET)) |
| return -1; |
| |
| if (!strcmp (from, "UTF-32BE") || !strcmp (from, "UTF-32")) |
| return 1; |
| |
| if (!strcmp (from, "UTF-32LE")) |
| return 2; |
| |
| if (strcmp (from, "wchar_t") && strcmp (from, GDB_DEFAULT_HOST_CHARSET)) |
| return -1; |
| |
| return 0; |
| } |
| |
| static int |
| phony_iconv_close (iconv_t arg) |
| { |
| return 0; |
| } |
| |
| static size_t |
| phony_iconv (iconv_t utf_flag, const char **inbuf, size_t *inbytesleft, |
| char **outbuf, size_t *outbytesleft) |
| { |
| if (utf_flag) |
| { |
| enum bfd_endian endian |
| = utf_flag == 1 ? BFD_ENDIAN_BIG : BFD_ENDIAN_LITTLE; |
| while (*inbytesleft >= 4) |
| { |
| unsigned long c |
| = extract_unsigned_integer ((const gdb_byte *)*inbuf, 4, endian); |
| |
| if (c >= 256) |
| { |
| errno = EILSEQ; |
| return -1; |
| } |
| if (*outbytesleft < 1) |
| { |
| errno = E2BIG; |
| return -1; |
| } |
| **outbuf = c & 0xff; |
| ++*outbuf; |
| --*outbytesleft; |
| |
| *inbuf += 4; |
| *inbytesleft -= 4; |
| } |
| if (*inbytesleft) |
| { |
| /* Partial sequence on input. */ |
| errno = EINVAL; |
| return -1; |
| } |
| } |
| else |
| { |
| /* In all other cases we simply copy input bytes to the |
| output. */ |
| size_t amt = *inbytesleft; |
| |
| if (amt > *outbytesleft) |
| amt = *outbytesleft; |
| memcpy (*outbuf, *inbuf, amt); |
| *inbuf += amt; |
| *outbuf += amt; |
| *inbytesleft -= amt; |
| *outbytesleft -= amt; |
| if (*inbytesleft) |
| { |
| errno = E2BIG; |
| return -1; |
| } |
| } |
| |
| /* The number of non-reversible conversions -- but they were all |
| reversible. */ |
| return 0; |
| } |
| |
| #else /* PHONY_ICONV */ |
| |
| /* On systems that don't have EILSEQ, GNU iconv's iconv.h defines it |
| to ENOENT, while gnulib defines it to a different value. Always |
| map ENOENT to gnulib's EILSEQ, leaving callers agnostic. */ |
| |
| static size_t |
| gdb_iconv (iconv_t utf_flag, ICONV_CONST char **inbuf, size_t *inbytesleft, |
| char **outbuf, size_t *outbytesleft) |
| { |
| size_t ret; |
| |
| ret = iconv (utf_flag, inbuf, inbytesleft, outbuf, outbytesleft); |
| if (errno == ENOENT) |
| errno = EILSEQ; |
| return ret; |
| } |
| |
| #undef iconv |
| #define iconv gdb_iconv |
| |
| #endif /* PHONY_ICONV */ |
| |
| |
| /* The global lists of character sets and translations. */ |
| |
| |
| #ifndef GDB_DEFAULT_TARGET_CHARSET |
| #define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1" |
| #endif |
| |
| #ifndef GDB_DEFAULT_TARGET_WIDE_CHARSET |
| #define GDB_DEFAULT_TARGET_WIDE_CHARSET "UTF-32" |
| #endif |
| |
| static const char *auto_host_charset_name = GDB_DEFAULT_HOST_CHARSET; |
| static const char *host_charset_name = "auto"; |
| static void |
| show_host_charset_name (struct ui_file *file, int from_tty, |
| struct cmd_list_element *c, |
| const char *value) |
| { |
| if (!strcmp (value, "auto")) |
| gdb_printf (file, |
| _("The host character set is \"auto; currently %s\".\n"), |
| auto_host_charset_name); |
| else |
| gdb_printf (file, _("The host character set is \"%s\".\n"), value); |
| } |
| |
| static const char *target_charset_name = "auto"; |
| static void |
| show_target_charset_name (struct ui_file *file, int from_tty, |
| struct cmd_list_element *c, const char *value) |
| { |
| if (!strcmp (value, "auto")) |
| gdb_printf (file, |
| _("The target character set is \"auto; " |
| "currently %s\".\n"), |
| gdbarch_auto_charset (get_current_arch ())); |
| else |
| gdb_printf (file, _("The target character set is \"%s\".\n"), |
| value); |
| } |
| |
| static const char *target_wide_charset_name = "auto"; |
| static void |
| show_target_wide_charset_name (struct ui_file *file, |
| int from_tty, |
| struct cmd_list_element *c, |
| const char *value) |
| { |
| if (!strcmp (value, "auto")) |
| gdb_printf (file, |
| _("The target wide character set is \"auto; " |
| "currently %s\".\n"), |
| gdbarch_auto_wide_charset (get_current_arch ())); |
| else |
| gdb_printf (file, _("The target wide character set is \"%s\".\n"), |
| value); |
| } |
| |
| static const char * const default_charset_names[] = |
| { |
| DEFAULT_CHARSET_NAMES |
| 0 |
| }; |
| |
| static const char * const *charset_enum; |
| |
| |
| /* If the target wide character set has big- or little-endian |
| variants, these are the corresponding names. */ |
| static const char *target_wide_charset_be_name; |
| static const char *target_wide_charset_le_name; |
| |
| /* The architecture for which the BE- and LE-names are valid. */ |
| static struct gdbarch *be_le_arch; |
| |
| /* A helper function which sets the target wide big- and little-endian |
| character set names, if possible. */ |
| |
| static void |
| set_be_le_names (struct gdbarch *gdbarch) |
| { |
| if (be_le_arch == gdbarch) |
| return; |
| be_le_arch = gdbarch; |
| |
| #ifdef PHONY_ICONV |
| /* Match the wide charset names recognized by phony_iconv_open. */ |
| target_wide_charset_le_name = "UTF-32LE"; |
| target_wide_charset_be_name = "UTF-32BE"; |
| #else |
| int i, len; |
| const char *target_wide; |
| |
| target_wide_charset_le_name = NULL; |
| target_wide_charset_be_name = NULL; |
| |
| target_wide = target_wide_charset_name; |
| if (!strcmp (target_wide, "auto")) |
| target_wide = gdbarch_auto_wide_charset (gdbarch); |
| |
| len = strlen (target_wide); |
| for (i = 0; charset_enum[i]; ++i) |
| { |
| if (strncmp (target_wide, charset_enum[i], len)) |
| continue; |
| if ((charset_enum[i][len] == 'B' |
| || charset_enum[i][len] == 'L') |
| && charset_enum[i][len + 1] == 'E' |
| && charset_enum[i][len + 2] == '\0') |
| { |
| if (charset_enum[i][len] == 'B') |
| target_wide_charset_be_name = charset_enum[i]; |
| else |
| target_wide_charset_le_name = charset_enum[i]; |
| } |
| } |
| # endif /* PHONY_ICONV */ |
| } |
| |
| /* 'Set charset', 'set host-charset', 'set target-charset', 'set |
| target-wide-charset', 'set charset' sfunc's. */ |
| |
| static void |
| validate (struct gdbarch *gdbarch) |
| { |
| iconv_t desc; |
| const char *host_cset = host_charset (); |
| const char *target_cset = target_charset (gdbarch); |
| const char *target_wide_cset = target_wide_charset_name; |
| |
| if (!strcmp (target_wide_cset, "auto")) |
| target_wide_cset = gdbarch_auto_wide_charset (gdbarch); |
| |
| desc = iconv_open (target_wide_cset, host_cset); |
| if (desc == (iconv_t) -1) |
| error (_("Cannot convert between character sets `%s' and `%s'"), |
| target_wide_cset, host_cset); |
| iconv_close (desc); |
| |
| desc = iconv_open (target_cset, host_cset); |
| if (desc == (iconv_t) -1) |
| error (_("Cannot convert between character sets `%s' and `%s'"), |
| target_cset, host_cset); |
| iconv_close (desc); |
| |
| /* Clear the cache. */ |
| be_le_arch = NULL; |
| } |
| |
| /* This is the sfunc for the 'set charset' command. */ |
| static void |
| set_charset_sfunc (const char *charset, int from_tty, |
| struct cmd_list_element *c) |
| { |
| /* CAREFUL: set the target charset here as well. */ |
| target_charset_name = host_charset_name; |
| validate (get_current_arch ()); |
| } |
| |
| /* 'set host-charset' command sfunc. We need a wrapper here because |
| the function needs to have a specific signature. */ |
| static void |
| set_host_charset_sfunc (const char *charset, int from_tty, |
| struct cmd_list_element *c) |
| { |
| validate (get_current_arch ()); |
| } |
| |
| /* Wrapper for the 'set target-charset' command. */ |
| static void |
| set_target_charset_sfunc (const char *charset, int from_tty, |
| struct cmd_list_element *c) |
| { |
| validate (get_current_arch ()); |
| } |
| |
| /* Wrapper for the 'set target-wide-charset' command. */ |
| static void |
| set_target_wide_charset_sfunc (const char *charset, int from_tty, |
| struct cmd_list_element *c) |
| { |
| validate (get_current_arch ()); |
| } |
| |
| /* sfunc for the 'show charset' command. */ |
| static void |
| show_charset (struct ui_file *file, int from_tty, |
| struct cmd_list_element *c, |
| const char *name) |
| { |
| show_host_charset_name (file, from_tty, c, host_charset_name); |
| show_target_charset_name (file, from_tty, c, target_charset_name); |
| show_target_wide_charset_name (file, from_tty, c, |
| target_wide_charset_name); |
| } |
| |
| |
| /* Accessor functions. */ |
| |
| const char * |
| host_charset (void) |
| { |
| if (!strcmp (host_charset_name, "auto")) |
| return auto_host_charset_name; |
| return host_charset_name; |
| } |
| |
| const char * |
| target_charset (struct gdbarch *gdbarch) |
| { |
| if (!strcmp (target_charset_name, "auto")) |
| return gdbarch_auto_charset (gdbarch); |
| return target_charset_name; |
| } |
| |
| const char * |
| target_wide_charset (struct gdbarch *gdbarch) |
| { |
| enum bfd_endian byte_order = gdbarch_byte_order (gdbarch); |
| |
| set_be_le_names (gdbarch); |
| if (byte_order == BFD_ENDIAN_BIG) |
| { |
| if (target_wide_charset_be_name) |
| return target_wide_charset_be_name; |
| } |
| else |
| { |
| if (target_wide_charset_le_name) |
| return target_wide_charset_le_name; |
| } |
| |
| if (!strcmp (target_wide_charset_name, "auto")) |
| return gdbarch_auto_wide_charset (gdbarch); |
| |
| return target_wide_charset_name; |
| } |
| |
| |
| /* Host character set management. For the time being, we assume that |
| the host character set is some superset of ASCII. */ |
| |
| char |
| host_letter_to_control_character (char c) |
| { |
| if (c == '?') |
| return 0177; |
| return c & 0237; |
| } |
| |
| |
| /* Public character management functions. */ |
| |
| class iconv_wrapper |
| { |
| public: |
| |
| iconv_wrapper (const char *to, const char *from) |
| { |
| m_desc = iconv_open (to, from); |
| if (m_desc == (iconv_t) -1) |
| perror_with_name (_("Converting character sets")); |
| } |
| |
| ~iconv_wrapper () |
| { |
| iconv_close (m_desc); |
| } |
| |
| size_t convert (ICONV_CONST char **inp, size_t *inleft, char **outp, |
| size_t *outleft) |
| { |
| return iconv (m_desc, inp, inleft, outp, outleft); |
| } |
| |
| private: |
| |
| iconv_t m_desc; |
| }; |
| |
| void |
| convert_between_encodings (const char *from, const char *to, |
| const gdb_byte *bytes, unsigned int num_bytes, |
| int width, struct obstack *output, |
| enum transliterations translit) |
| { |
| size_t inleft; |
| ICONV_CONST char *inp; |
| unsigned int space_request; |
| |
| /* Often, the host and target charsets will be the same. */ |
| if (!strcmp (from, to)) |
| { |
| obstack_grow (output, bytes, num_bytes); |
| return; |
| } |
| |
| iconv_wrapper desc (to, from); |
| |
| inleft = num_bytes; |
| inp = (ICONV_CONST char *) bytes; |
| |
| space_request = num_bytes; |
| |
| while (inleft > 0) |
| { |
| char *outp; |
| size_t outleft, r; |
| int old_size; |
| |
| old_size = obstack_object_size (output); |
| obstack_blank (output, space_request); |
| |
| outp = (char *) obstack_base (output) + old_size; |
| outleft = space_request; |
| |
| r = desc.convert (&inp, &inleft, &outp, &outleft); |
| |
| /* Now make sure that the object on the obstack only includes |
| bytes we have converted. */ |
| obstack_blank_fast (output, -(ssize_t) outleft); |
| |
| if (r == (size_t) -1) |
| { |
| switch (errno) |
| { |
| case EILSEQ: |
| { |
| int i; |
| |
| /* Invalid input sequence. */ |
| if (translit == translit_none) |
| error (_("Could not convert character " |
| "to `%s' character set"), to); |
| |
| /* We emit escape sequence for the bytes, skip them, |
| and try again. */ |
| for (i = 0; i < width; ++i) |
| { |
| char octal[5]; |
| |
| xsnprintf (octal, sizeof (octal), "\\%.3o", *inp & 0xff); |
| obstack_grow_str (output, octal); |
| |
| ++inp; |
| --inleft; |
| } |
| } |
| break; |
| |
| case E2BIG: |
| /* We ran out of space in the output buffer. Make it |
| bigger next time around. */ |
| space_request *= 2; |
| break; |
| |
| case EINVAL: |
| /* Incomplete input sequence. FIXME: ought to report this |
| to the caller somehow. */ |
| inleft = 0; |
| break; |
| |
| default: |
| perror_with_name (_("Internal error while " |
| "converting character sets")); |
| } |
| } |
| } |
| } |
| |
| |
| |
| /* Create a new iterator. */ |
| wchar_iterator::wchar_iterator (const gdb_byte *input, size_t bytes, |
| const char *charset, size_t width) |
| : m_input (input), |
| m_bytes (bytes), |
| m_width (width), |
| m_out (1) |
| { |
| m_desc = iconv_open (INTERMEDIATE_ENCODING, charset); |
| if (m_desc == (iconv_t) -1) |
| perror_with_name (_("Converting character sets")); |
| } |
| |
| wchar_iterator::~wchar_iterator () |
| { |
| if (m_desc != (iconv_t) -1) |
| iconv_close (m_desc); |
| } |
| |
| int |
| wchar_iterator::iterate (enum wchar_iterate_result *out_result, |
| gdb_wchar_t **out_chars, |
| const gdb_byte **ptr, |
| size_t *len) |
| { |
| size_t out_request; |
| |
| /* Try to convert some characters. At first we try to convert just |
| a single character. The reason for this is that iconv does not |
| necessarily update its outgoing arguments when it encounters an |
| invalid input sequence -- but we want to reliably report this to |
| our caller so it can emit an escape sequence. */ |
| out_request = 1; |
| while (m_bytes > 0) |
| { |
| ICONV_CONST char *inptr = (ICONV_CONST char *) m_input; |
| char *outptr = (char *) m_out.data (); |
| const gdb_byte *orig_inptr = m_input; |
| size_t orig_in = m_bytes; |
| size_t out_avail = out_request * sizeof (gdb_wchar_t); |
| size_t num; |
| size_t r = iconv (m_desc, &inptr, &m_bytes, &outptr, &out_avail); |
| |
| m_input = (gdb_byte *) inptr; |
| |
| if (r == (size_t) -1) |
| { |
| switch (errno) |
| { |
| case EILSEQ: |
| /* Invalid input sequence. We still might have |
| converted a character; if so, return it. */ |
| if (out_avail < out_request * sizeof (gdb_wchar_t)) |
| break; |
| |
| /* Otherwise skip the first invalid character, and let |
| the caller know about it. */ |
| *out_result = wchar_iterate_invalid; |
| *ptr = m_input; |
| *len = m_width; |
| m_input += m_width; |
| m_bytes -= m_width; |
| return 0; |
| |
| case E2BIG: |
| /* We ran out of space. We still might have converted a |
| character; if so, return it. Otherwise, grow the |
| buffer and try again. */ |
| if (out_avail < out_request * sizeof (gdb_wchar_t)) |
| break; |
| |
| ++out_request; |
| if (out_request > m_out.size ()) |
| m_out.resize (out_request); |
| continue; |
| |
| case EINVAL: |
| /* Incomplete input sequence. Let the caller know, and |
| arrange for future calls to see EOF. */ |
| *out_result = wchar_iterate_incomplete; |
| *ptr = m_input; |
| *len = m_bytes; |
| m_bytes = 0; |
| return 0; |
| |
| default: |
| perror_with_name (_("Internal error while " |
| "converting character sets")); |
| } |
| } |
| |
| /* We converted something. */ |
| num = out_request - out_avail / sizeof (gdb_wchar_t); |
| *out_result = wchar_iterate_ok; |
| *out_chars = m_out.data (); |
| *ptr = orig_inptr; |
| *len = orig_in - m_bytes; |
| return num; |
| } |
| |
| /* Really done. */ |
| *out_result = wchar_iterate_eof; |
| return -1; |
| } |
| |
| struct charset_vector |
| { |
| ~charset_vector () |
| { |
| /* Note that we do not call charset_vector::clear, which would also xfree |
| the elements. This destructor is only called after exit, at which point |
| those will be freed anyway on process exit, so not freeing them now is |
| not classified as a memory leak. OTOH, freeing them now might be |
| classified as a data race, because some worker thread might still be |
| accessing them. */ |
| charsets.clear (); |
| } |
| |
| void clear () |
| { |
| for (char *c : charsets) |
| xfree (c); |
| |
| charsets.clear (); |
| } |
| |
| std::vector<char *> charsets; |
| }; |
| |
| static charset_vector charsets; |
| |
| #ifdef PHONY_ICONV |
| |
| static void |
| find_charset_names (void) |
| { |
| charsets.charsets.push_back (xstrdup (GDB_DEFAULT_HOST_CHARSET)); |
| charsets.charsets.push_back (NULL); |
| } |
| |
| #else /* PHONY_ICONV */ |
| |
| /* Sometimes, libiconv redefines iconvlist as libiconvlist -- but |
| provides different symbols in the static and dynamic libraries. |
| So, configure may see libiconvlist but not iconvlist. But, calling |
| iconvlist is the right thing to do and will work. Hence we do a |
| check here but unconditionally call iconvlist below. */ |
| #if defined (HAVE_ICONVLIST) || defined (HAVE_LIBICONVLIST) |
| |
| /* A helper function that adds some character sets to the vector of |
| all character sets. This is a callback function for iconvlist. */ |
| |
| static int |
| add_one (unsigned int count, const char *const *names, void *data) |
| { |
| unsigned int i; |
| |
| for (i = 0; i < count; ++i) |
| charsets.charsets.push_back (xstrdup (names[i])); |
| |
| return 0; |
| } |
| |
| static void |
| find_charset_names (void) |
| { |
| iconvlist (add_one, NULL); |
| |
| charsets.charsets.push_back (NULL); |
| } |
| |
| #else |
| |
| /* Return non-zero if LINE (output from iconv) should be ignored. |
| Older iconv programs (e.g. 2.2.2) include the human readable |
| introduction even when stdout is not a tty. Newer versions omit |
| the intro if stdout is not a tty. */ |
| |
| static int |
| ignore_line_p (const char *line) |
| { |
| /* This table is used to filter the output. If this text appears |
| anywhere in the line, it is ignored (strstr is used). */ |
| static const char * const ignore_lines[] = |
| { |
| "The following", |
| "not necessarily", |
| "the FROM and TO", |
| "listed with several", |
| NULL |
| }; |
| int i; |
| |
| for (i = 0; ignore_lines[i] != NULL; ++i) |
| { |
| if (strstr (line, ignore_lines[i]) != NULL) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| static void |
| find_charset_names (void) |
| { |
| struct pex_obj *child; |
| const char *args[3]; |
| int err, status; |
| int fail = 1; |
| int flags; |
| gdb_environ iconv_env = gdb_environ::from_host_environ (); |
| char *iconv_program; |
| |
| /* Older iconvs, e.g. 2.2.2, don't omit the intro text if stdout is |
| not a tty. We need to recognize it and ignore it. This text is |
| subject to translation, so force LANGUAGE=C. */ |
| iconv_env.set ("LANGUAGE", "C"); |
| iconv_env.set ("LC_ALL", "C"); |
| |
| child = pex_init (PEX_USE_PIPES, "iconv", NULL); |
| |
| #ifdef ICONV_BIN |
| { |
| std::string iconv_dir = relocate_gdb_directory (ICONV_BIN, |
| ICONV_BIN_RELOCATABLE); |
| iconv_program |
| = concat (iconv_dir.c_str(), SLASH_STRING, "iconv", (char *) NULL); |
| } |
| #else |
| iconv_program = xstrdup ("iconv"); |
| #endif |
| args[0] = iconv_program; |
| args[1] = "-l"; |
| args[2] = NULL; |
| flags = PEX_STDERR_TO_STDOUT; |
| #ifndef ICONV_BIN |
| flags |= PEX_SEARCH; |
| #endif |
| /* Note that we simply ignore errors here. */ |
| if (!pex_run_in_environment (child, flags, |
| args[0], const_cast<char **> (args), |
| iconv_env.envp (), |
| NULL, NULL, &err)) |
| { |
| FILE *in = pex_read_output (child, 0); |
| |
| /* POSIX says that iconv -l uses an unspecified format. We |
| parse the glibc and libiconv formats; feel free to add others |
| as needed. */ |
| |
| while (in != NULL && !feof (in)) |
| { |
| /* The size of buf is chosen arbitrarily. */ |
| char buf[1024]; |
| char *start, *r; |
| int len; |
| |
| r = fgets (buf, sizeof (buf), in); |
| if (!r) |
| break; |
| len = strlen (r); |
| if (len <= 3) |
| continue; |
| if (ignore_line_p (r)) |
| continue; |
| |
| /* Strip off the newline. */ |
| --len; |
| /* Strip off one or two '/'s. glibc will print lines like |
| "8859_7//", but also "10646-1:1993/UCS4/". */ |
| if (buf[len - 1] == '/') |
| --len; |
| if (buf[len - 1] == '/') |
| --len; |
| buf[len] = '\0'; |
| |
| /* libiconv will print multiple entries per line, separated |
| by spaces. Older iconvs will print multiple entries per |
| line, indented by two spaces, and separated by ", " |
| (i.e. the human readable form). */ |
| start = buf; |
| while (1) |
| { |
| int keep_going; |
| char *p; |
| |
| /* Skip leading blanks. */ |
| for (p = start; *p && *p == ' '; ++p) |
| ; |
| start = p; |
| /* Find the next space, comma, or end-of-line. */ |
| for ( ; *p && *p != ' ' && *p != ','; ++p) |
| ; |
| /* Ignore an empty result. */ |
| if (p == start) |
| break; |
| keep_going = *p; |
| *p = '\0'; |
| charsets.charsets.push_back (xstrdup (start)); |
| if (!keep_going) |
| break; |
| /* Skip any extra spaces. */ |
| for (start = p + 1; *start && *start == ' '; ++start) |
| ; |
| } |
| } |
| |
| if (pex_get_status (child, 1, &status) |
| && WIFEXITED (status) && !WEXITSTATUS (status)) |
| fail = 0; |
| |
| } |
| |
| xfree (iconv_program); |
| pex_free (child); |
| |
| if (fail) |
| { |
| /* Some error occurred, so drop the vector. */ |
| charsets.clear (); |
| } |
| else |
| charsets.charsets.push_back (NULL); |
| } |
| |
| #endif /* HAVE_ICONVLIST || HAVE_LIBICONVLIST */ |
| #endif /* PHONY_ICONV */ |
| |
| /* The "auto" target charset used by default_auto_charset. */ |
| static const char *auto_target_charset_name = GDB_DEFAULT_TARGET_CHARSET; |
| |
| const char * |
| default_auto_charset (void) |
| { |
| return auto_target_charset_name; |
| } |
| |
| const char * |
| default_auto_wide_charset (void) |
| { |
| return GDB_DEFAULT_TARGET_WIDE_CHARSET; |
| } |
| |
| |
| #ifdef USE_INTERMEDIATE_ENCODING_FUNCTION |
| /* Macro used for UTF or UCS endianness suffix. */ |
| #if WORDS_BIGENDIAN |
| #define ENDIAN_SUFFIX "BE" |
| #else |
| #define ENDIAN_SUFFIX "LE" |
| #endif |
| |
| /* GDB cannot handle strings correctly if this size is different. */ |
| |
| static_assert (sizeof (gdb_wchar_t) == 2 || sizeof (gdb_wchar_t) == 4); |
| |
| /* intermediate_encoding returns the charset used internally by |
| GDB to convert between target and host encodings. As the test above |
| compiled, sizeof (gdb_wchar_t) is either 2 or 4 bytes. |
| UTF-16/32 is tested first, UCS-2/4 is tested as a second option, |
| otherwise an error is generated. */ |
| |
| const char * |
| intermediate_encoding (void) |
| { |
| iconv_t desc; |
| static const char *stored_result = NULL; |
| gdb::unique_xmalloc_ptr<char> result; |
| |
| if (stored_result) |
| return stored_result; |
| result = xstrprintf ("UTF-%d%s", (int) (sizeof (gdb_wchar_t) * 8), |
| ENDIAN_SUFFIX); |
| /* Check that the name is supported by iconv_open. */ |
| desc = iconv_open (result.get (), host_charset ()); |
| if (desc != (iconv_t) -1) |
| { |
| iconv_close (desc); |
| stored_result = result.release (); |
| return stored_result; |
| } |
| /* Second try, with UCS-2 type. */ |
| result = xstrprintf ("UCS-%d%s", (int) sizeof (gdb_wchar_t), |
| ENDIAN_SUFFIX); |
| /* Check that the name is supported by iconv_open. */ |
| desc = iconv_open (result.get (), host_charset ()); |
| if (desc != (iconv_t) -1) |
| { |
| iconv_close (desc); |
| stored_result = result.release (); |
| return stored_result; |
| } |
| /* No valid charset found, generate error here. */ |
| error (_("Unable to find a valid charset for string conversions")); |
| } |
| |
| #endif /* USE_INTERMEDIATE_ENCODING_FUNCTION */ |
| |
| void _initialize_charset (); |
| void |
| _initialize_charset () |
| { |
| /* The first element is always "auto". */ |
| charsets.charsets.push_back (xstrdup ("auto")); |
| find_charset_names (); |
| |
| if (charsets.charsets.size () > 1) |
| charset_enum = (const char * const *) charsets.charsets.data (); |
| else |
| charset_enum = default_charset_names; |
| |
| #ifndef PHONY_ICONV |
| #ifdef HAVE_LANGINFO_CODESET |
| /* The result of nl_langinfo may be overwritten later. This may |
| leak a little memory, if the user later changes the host charset, |
| but that doesn't matter much. */ |
| auto_host_charset_name = xstrdup (nl_langinfo (CODESET)); |
| /* Solaris will return `646' here -- but the Solaris iconv then does |
| not accept this. Darwin (and maybe FreeBSD) may return "" here, |
| which GNU libiconv doesn't like (infinite loop). */ |
| if (!strcmp (auto_host_charset_name, "646") || !*auto_host_charset_name) |
| auto_host_charset_name = "ASCII"; |
| auto_target_charset_name = auto_host_charset_name; |
| #elif defined (USE_WIN32API) |
| { |
| /* "CP" + x<=5 digits + paranoia. */ |
| static char w32_host_default_charset[16]; |
| |
| snprintf (w32_host_default_charset, sizeof w32_host_default_charset, |
| "CP%d", GetACP()); |
| auto_host_charset_name = w32_host_default_charset; |
| auto_target_charset_name = auto_host_charset_name; |
| } |
| #endif |
| #endif |
| |
| /* Recall that the first element is always "auto". */ |
| host_charset_name = charset_enum[0]; |
| gdb_assert (strcmp (host_charset_name, "auto") == 0); |
| add_setshow_enum_cmd ("charset", class_support, |
| charset_enum, &host_charset_name, _("\ |
| Set the host and target character sets."), _("\ |
| Show the host and target character sets."), _("\ |
| The `host character set' is the one used by the system GDB is running on.\n\ |
| The `target character set' is the one used by the program being debugged.\n\ |
| You may only use supersets of ASCII for your host character set; GDB does\n\ |
| not support any others.\n\ |
| To see a list of the character sets GDB supports, type `set charset <TAB>'."), |
| /* Note that the sfunc below needs to set |
| target_charset_name, because the 'set |
| charset' command sets two variables. */ |
| set_charset_sfunc, |
| show_charset, |
| &setlist, &showlist); |
| |
| add_setshow_enum_cmd ("host-charset", class_support, |
| charset_enum, &host_charset_name, _("\ |
| Set the host character set."), _("\ |
| Show the host character set."), _("\ |
| The `host character set' is the one used by the system GDB is running on.\n\ |
| You may only use supersets of ASCII for your host character set; GDB does\n\ |
| not support any others.\n\ |
| To see a list of the character sets GDB supports, type `set host-charset <TAB>'."), |
| set_host_charset_sfunc, |
| show_host_charset_name, |
| &setlist, &showlist); |
| |
| /* Recall that the first element is always "auto". */ |
| target_charset_name = charset_enum[0]; |
| gdb_assert (strcmp (target_charset_name, "auto") == 0); |
| add_setshow_enum_cmd ("target-charset", class_support, |
| charset_enum, &target_charset_name, _("\ |
| Set the target character set."), _("\ |
| Show the target character set."), _("\ |
| The `target character set' is the one used by the program being debugged.\n\ |
| GDB translates characters and strings between the host and target\n\ |
| character sets as needed.\n\ |
| To see a list of the character sets GDB supports, type `set target-charset'<TAB>"), |
| set_target_charset_sfunc, |
| show_target_charset_name, |
| &setlist, &showlist); |
| |
| /* Recall that the first element is always "auto". */ |
| target_wide_charset_name = charset_enum[0]; |
| gdb_assert (strcmp (target_wide_charset_name, "auto") == 0); |
| add_setshow_enum_cmd ("target-wide-charset", class_support, |
| charset_enum, &target_wide_charset_name, |
| _("\ |
| Set the target wide character set."), _("\ |
| Show the target wide character set."), _("\ |
| The `target wide character set' is the one used by the program being debugged.\ |
| \nIn particular it is the encoding used by `wchar_t'.\n\ |
| GDB translates characters and strings between the host and target\n\ |
| character sets as needed.\n\ |
| To see a list of the character sets GDB supports, type\n\ |
| `set target-wide-charset'<TAB>"), |
| set_target_wide_charset_sfunc, |
| show_target_wide_charset_name, |
| &setlist, &showlist); |
| } |