gdb/charset.h - binutils-gdb - Git at Google

 /* Character set conversion support for GDB.
    Copyright (C) 2001-2023 Free Software Foundation, Inc.

    This file is part of GDB.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

 #ifndef CHARSET_H
 #define CHARSET_H

 #include "gdbsupport/def-vector.h"

 /* If the target program uses a different character set than the host,
    GDB has some support for translating between the two; GDB converts
    characters and strings to the host character set before displaying
    them, and converts characters and strings appearing in expressions
    entered by the user to the target character set.

    GDB's code pretty much assumes that the host character set is some
    superset of ASCII; there are plenty if ('0' + n) expressions and
    the like.  */

 /* Return the name of the current host/target character set.  The
    result is owned by the charset module; the caller should not free
    it.  */
 const char *host_charset (void);
 const char *target_charset (struct gdbarch *gdbarch);
 const char *target_wide_charset (struct gdbarch *gdbarch);

 /* These values are used to specify the type of transliteration done
    by convert_between_encodings.  */
 enum transliterations
   {
     /* Error on failure to convert.  */
     translit_none,
     /* Transliterate to host char.  */
     translit_char
   };

 /* Convert between two encodings.

    FROM is the name of the source encoding.
    TO is the name of the target encoding.
    BYTES holds the bytes to convert; this is assumed to be characters
    in the target encoding.
    NUM_BYTES is the number of bytes.
    WIDTH is the width of a character from the FROM charset, in bytes.
    For a variable width encoding, WIDTH should be the size of a "base
    character".
    OUTPUT is an obstack where the converted data is written.  The
    caller is responsible for initializing the obstack, and for
    destroying the obstack should an error occur.
    TRANSLIT specifies how invalid conversions should be handled.  */

 void convert_between_encodings (const char *from, const char *to,
 				const gdb_byte *bytes,
 				unsigned int num_bytes,
 				int width, struct obstack *output,
 				enum transliterations translit);


 /* These values are used by wchar_iterate to report errors.  */
 enum wchar_iterate_result
   {
     /* Ordinary return.  */
     wchar_iterate_ok,
     /* Invalid input sequence.  */
     wchar_iterate_invalid,
     /* Incomplete input sequence at the end of the input.  */
     wchar_iterate_incomplete,
     /* EOF.  */
     wchar_iterate_eof
   };

 /* An iterator that returns host wchar_t's from a target string.  */
 class wchar_iterator
 {
  public:

   /* Create a new character iterator which returns wchar_t's.  INPUT is
      the input buffer.  BYTES is the number of bytes in the input
      buffer.  CHARSET is the name of the character set in which INPUT is
      encoded.  WIDTH is the number of bytes in a base character of
      CHARSET.

      This constructor can throw on error.  */
   wchar_iterator (const gdb_byte *input, size_t bytes, const char *charset,
 		  size_t width);

   ~wchar_iterator ();

   /* Perform a single iteration of a wchar_t iterator.

      Returns the number of characters converted.  A negative result
      means that EOF has been reached.  A positive result indicates the
      number of valid wchar_ts in the result; *OUT_CHARS is updated to
      point to the first valid character.

      In all cases aside from EOF, *PTR is set to point to the first
      converted target byte.  *LEN is set to the number of bytes
      converted.

      A zero result means one of several unusual results.  *OUT_RESULT is
      set to indicate the type of un-ordinary return.

      wchar_iterate_invalid means that an invalid input character was
      seen.  The iterator is advanced by WIDTH (the argument to
      the wchar_iterator constructor) bytes.

      wchar_iterate_incomplete means that an incomplete character was
      seen at the end of the input sequence.

      wchar_iterate_eof means that all bytes were successfully
      converted.  The other output arguments are not set.  */
   int iterate (enum wchar_iterate_result *out_result, gdb_wchar_t **out_chars,
 	       const gdb_byte **ptr, size_t *len);

  private:

   /* The underlying iconv descriptor.  */
 #ifdef PHONY_ICONV
   int m_desc;
 #else
   iconv_t m_desc;
 #endif

   /* The input string.  This is updated as we convert characters.  */
   const gdb_byte *m_input;
   /* The number of bytes remaining in the input.  */
   size_t m_bytes;

   /* The width of an input character.  */
   size_t m_width;

   /* The output buffer.  */
   gdb::def_vector<gdb_wchar_t> m_out;
 };


 /* GDB needs to know a few details of its execution character set.
    This knowledge is isolated here and in charset.c.  */

 /* The escape character.  */
 #define HOST_ESCAPE_CHAR 27

 /* Convert a letter, like 'c', to its corresponding control
    character.  */
 char host_letter_to_control_character (char c);

 #if WORDS_BIGENDIAN
 #define HOST_UTF32 "UTF-32BE"
 #else
 #define HOST_UTF32 "UTF-32LE"
 #endif

 #endif /* CHARSET_H */
	/* Character set conversion support for GDB.
	Copyright (C) 2001-2023 Free Software Foundation, Inc.

	This file is part of GDB.

	This program is free software; you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation; either version 3 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <http://www.gnu.org/licenses/>. */

	#ifndef CHARSET_H
	#define CHARSET_H

	#include "gdbsupport/def-vector.h"

	/* If the target program uses a different character set than the host,
	GDB has some support for translating between the two; GDB converts
	characters and strings to the host character set before displaying
	them, and converts characters and strings appearing in expressions
	entered by the user to the target character set.

	GDB's code pretty much assumes that the host character set is some
	superset of ASCII; there are plenty if ('0' + n) expressions and
	the like. */

	/* Return the name of the current host/target character set. The
	result is owned by the charset module; the caller should not free
	it. */
	const char *host_charset (void);
	const char target_charset (struct gdbarch gdbarch);
	const char target_wide_charset (struct gdbarch gdbarch);

	/* These values are used to specify the type of transliteration done
	by convert_between_encodings. */
	enum transliterations
	{
	/* Error on failure to convert. */
	translit_none,
	/* Transliterate to host char. */
	translit_char
	};

	/* Convert between two encodings.

	FROM is the name of the source encoding.
	TO is the name of the target encoding.
	BYTES holds the bytes to convert; this is assumed to be characters
	in the target encoding.
	NUM_BYTES is the number of bytes.
	WIDTH is the width of a character from the FROM charset, in bytes.
	For a variable width encoding, WIDTH should be the size of a "base
	character".
	OUTPUT is an obstack where the converted data is written. The
	caller is responsible for initializing the obstack, and for
	destroying the obstack should an error occur.
	TRANSLIT specifies how invalid conversions should be handled. */

	void convert_between_encodings (const char from, const char to,
	const gdb_byte *bytes,
	unsigned int num_bytes,
	int width, struct obstack *output,
	enum transliterations translit);


	/* These values are used by wchar_iterate to report errors. */
	enum wchar_iterate_result
	{
	/* Ordinary return. */
	wchar_iterate_ok,
	/* Invalid input sequence. */
	wchar_iterate_invalid,
	/* Incomplete input sequence at the end of the input. */
	wchar_iterate_incomplete,
	/* EOF. */
	wchar_iterate_eof
	};

	/* An iterator that returns host wchar_t's from a target string. */
	class wchar_iterator
	{
	public:

	/* Create a new character iterator which returns wchar_t's. INPUT is
	the input buffer. BYTES is the number of bytes in the input
	buffer. CHARSET is the name of the character set in which INPUT is
	encoded. WIDTH is the number of bytes in a base character of
	CHARSET.

	This constructor can throw on error. */
	wchar_iterator (const gdb_byte input, size_t bytes, const char charset,
	size_t width);

	~wchar_iterator ();

	/* Perform a single iteration of a wchar_t iterator.

	Returns the number of characters converted. A negative result
	means that EOF has been reached. A positive result indicates the
	number of valid wchar_ts in the result; *OUT_CHARS is updated to
	point to the first valid character.

	In all cases aside from EOF, *PTR is set to point to the first
	converted target byte. *LEN is set to the number of bytes
	converted.

	A zero result means one of several unusual results. *OUT_RESULT is
	set to indicate the type of un-ordinary return.

	wchar_iterate_invalid means that an invalid input character was
	seen. The iterator is advanced by WIDTH (the argument to
	the wchar_iterator constructor) bytes.

	wchar_iterate_incomplete means that an incomplete character was
	seen at the end of the input sequence.

	wchar_iterate_eof means that all bytes were successfully
	converted. The other output arguments are not set. */
	int iterate (enum wchar_iterate_result out_result, gdb_wchar_t *out_chars,
	const gdb_byte *ptr, size_t len);

	private:

	/* The underlying iconv descriptor. */
	#ifdef PHONY_ICONV
	int m_desc;
	#else
	iconv_t m_desc;
	#endif

	/* The input string. This is updated as we convert characters. */
	const gdb_byte *m_input;
	/* The number of bytes remaining in the input. */
	size_t m_bytes;

	/* The width of an input character. */
	size_t m_width;

	/* The output buffer. */
	gdb::def_vector<gdb_wchar_t> m_out;
	};



	/* GDB needs to know a few details of its execution character set.
	This knowledge is isolated here and in charset.c. */

	/* The escape character. */
	#define HOST_ESCAPE_CHAR 27

	/* Convert a letter, like 'c', to its corresponding control
	character. */
	char host_letter_to_control_character (char c);

	#if WORDS_BIGENDIAN
	#define HOST_UTF32 "UTF-32BE"
	#else
	#define HOST_UTF32 "UTF-32LE"
	#endif

	#endif /* CHARSET_H */