This is the mail archive of the gdb-patches@sources.redhat.com mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH RFC] Character set support


Jim Blandy is the author of the patch below.  I have looked it over
though and it looks reasonable to me.  Also, I have tested Jim's work
and see no regressions nor any new failures as a result of the tests
that Jim has added.

I have tried to think of ways to break these changes up so that they
may be committed it in smaller sections.  With the exception of
possible separate commits for the documentation and testsuite
portions, I can't think of any further divisions which make sense.

I'll wait a week for comments after which time, if there are no
objections, I'll commit it.

Kevin

gdb/ChangeLog:
2002-MM-DD  Jim Blandy  <jimb@redhat.com>

	Add support for distinct host and target character sets.
	* charset.c, charset.h: New files.
	* c-exp.y: #include "charset.h".
	(yylex): Convert character and string literals to the target
	character set, before returning them as the semantic value of the
	token.
	* c-lang.c: #include "charset.h".
	(c_emit_char): Use charset-specific methods to recognize
	characters with backslash escape forms, to decide which characters
	to print literally and which to print using numeric escape
	sequences, and to convert target characters to host characters
	before printing.
	* utils.c: #include "charset.h".
	(no_control_char_error): New function.
	(parse_escape): Use charset-specific methods to recognize
	backslash escapes, parse `control character' notation, and convert
	characters from the host character set to the target character set.
	* configure.in: Set the default host character set.
	Check where to find iconv, and what its argument types might be.
	* acinclude.m4 (AM_ICONV): New macro, borrowed from GCC.
	* Makefile.in (SFILES): List charset.c.
	(COMMON_OBS): List charset.o.	
	(charset.o): New rule.
	(charset_h): New header dependency variable.
	(c-lang.o, utils.o, c-exp.tab.o): Note dependency on $(charset_h).
	(LIBICONV): New variable, set by configure.
	(CLIBS): Include $(LIBICONV) here.
	* aclocal.m4, config.in, configure: Regenerated.

gdb/testsuite/ChangeLog:
2002-MM-DD  Jim Blandy  <jimb@redhat.com>

        * gdb.base/charset.exp, gdb.base/charset.c: New files.

gdb/doc/ChangeLog:
2002-MM-DD  Jim Blandy  <jimb@redhat.com>

	* gdb.texinfo: Add character set documentation.

Index: Makefile.in
===================================================================
RCS file: /cvs/src/src/gdb/Makefile.in,v
retrieving revision 1.257
diff -u -p -r1.257 Makefile.in
--- Makefile.in	2 Sep 2002 18:09:06 -0000	1.257
+++ Makefile.in	12 Sep 2002 23:46:12 -0000
@@ -138,6 +138,9 @@ INTL_DEPS = @INTLDEPS@
 INTL_SRC = $(srcdir)/$(INTL_DIR)
 INTL_CFLAGS = -I$(INTL_DIR) -I$(INTL_SRC)
 
+# Where is the ICONV library?  This can be empty if libc has iconv.
+LIBICONV = @LIBICONV@
+
 #
 # CLI sub directory definitons
 #
@@ -369,6 +372,7 @@ INSTALLED_LIBS=-lbfd -lreadline -lopcode
 	-lmmalloc -lintl -liberty
 CLIBS = $(SIM) $(BFD) $(READLINE) $(OPCODES) $(INTL) $(LIBIBERTY) \
 	$(TERMCAP) $(XM_CLIBS) $(TM_CLIBS) $(NAT_CLIBS) $(GDBTKLIBS) @LIBS@ \
+	$(LIBICONV) \
 	$(MMALLOC) $(LIBIBERTY) $(WIN32LIBS)
 CDEPS = $(XM_CDEPS) $(TM_CDEPS) $(NAT_CDEPS) $(SIM) $(BFD) $(READLINE) \
 	$(OPCODES) $(MMALLOC) $(INTL_DEPS) $(LIBIBERTY) $(CONFIG_DEPS)
@@ -526,6 +530,7 @@ TARGET_FLAGS_TO_PASS = \
 
 SFILES = ada-exp.y ada-lang.c ada-typeprint.c ada-valprint.c ada-tasks.c \
 	ax-general.c ax-gdb.c bcache.c blockframe.c breakpoint.c \
+	charset.c \
 	buildsym.c c-exp.y c-lang.c c-typeprint.c c-valprint.c \
 	coffread.c \
 	complaints.c completer.c corefile.c cp-valprint.c dbxread.c \
@@ -766,6 +771,8 @@ tuiSourceWin_h = $(srcdir)/tui/tuiSource
 tuiStack_h = $(srcdir)/tui/tuiStack.h
 tuiWin_h = $(srcdir)/tui/tuiWin.h
 
+charset_h =	charset.h
+
 # Header files that need to have srcdir added.  Note that in the cases
 # where we use a macro like $(gdbcmd_h), things are carefully arranged
 # so that each .h file is listed exactly once (M-x tags-search works
@@ -817,6 +824,7 @@ TAGFILES_NO_SRCDIR = $(SFILES) $(HFILES_
 TAGFILES_WITH_SRCDIR = $(HFILES_WITH_SRCDIR)
 
 COMMON_OBS = version.o blockframe.o breakpoint.o findvar.o regcache.o \
+	charset.o \
 	source.o values.o eval.o valops.o valarith.o valprint.o printcmd.o \
 	symtab.o symfile.o symmisc.o linespec.o infcmd.o infrun.o \
 	expprint.o environ.o stack.o thread.o \
@@ -1438,6 +1446,7 @@ z8k-tdep.o: $(srcdir)/z8k-tdep.c
 
 c-exp.tab.o: c-exp.tab.c $(defs_h) $(gdb_string_h) $(expression_h) \
 	$(value_h) $(parser_defs_h) $(language_h) $(c_lang_h) $(bfd_h) \
+	$(charset_h) \
 	$(symfile_h) $(objfiles_h)
 
 jv-exp.tab.o: jv-exp.tab.c jv-lang.h $(defs_h) $(expression_h) \
@@ -2578,4 +2587,8 @@ xdr_rdb.o: vx-share/xdr_rdb.c $(defs_h) 
 	vx-share/vxWorks.h vx-share/xdr_rdb.h
 	$(CC) -c $(INTERNAL_CFLAGS) $(srcdir)/vx-share/xdr_rdb.c
 
+charset.o: charset.c $(defs_h) $(charset_h) $(gdbcmd_h) gdb_assert.h
+
+c-lang.o: $(charset_h)
+utils.o: $(charset_h)
 ### end of the gdb Makefile.in.
Index: acinclude.m4
===================================================================
RCS file: /cvs/src/src/gdb/acinclude.m4,v
retrieving revision 1.3
diff -u -p -r1.3 acinclude.m4
--- acinclude.m4	15 Apr 2002 04:28:41 -0000	1.3
+++ acinclude.m4	12 Sep 2002 23:46:13 -0000
@@ -976,3 +976,71 @@ case "x$am_cv_prog_cc_stdc" in
   *) CC="$CC $am_cv_prog_cc_stdc" ;;
 esac
 ])
+
+dnl From Bruno Haible.
+
+AC_DEFUN([AM_ICONV],
+[
+  dnl Some systems have iconv in libc, some have it in libiconv (OSF/1 and
+  dnl those with the standalone portable GNU libiconv installed).
+
+  AC_ARG_WITH([libiconv-prefix],
+[  --with-libiconv-prefix=DIR  search for libiconv in DIR/include and DIR/lib], [
+    for dir in `echo "$withval" | tr : ' '`; do
+      if test -d $dir/include; then CPPFLAGS="$CPPFLAGS -I$dir/include"; fi
+      if test -d $dir/lib; then LDFLAGS="$LDFLAGS -L$dir/lib"; fi
+    done
+   ])
+
+  AC_CACHE_CHECK(for iconv, am_cv_func_iconv, [
+    am_cv_func_iconv="no, consider installing GNU libiconv"
+    am_cv_lib_iconv=no
+    AC_TRY_LINK([#include <stdlib.h>
+#include <iconv.h>],
+      [iconv_t cd = iconv_open("","");
+       iconv(cd,NULL,NULL,NULL,NULL);
+       iconv_close(cd);],
+      am_cv_func_iconv=yes)
+    if test "$am_cv_func_iconv" != yes; then
+      am_save_LIBS="$LIBS"
+      LIBS="$LIBS -liconv"
+      AC_TRY_LINK([#include <stdlib.h>
+#include <iconv.h>],
+        [iconv_t cd = iconv_open("","");
+         iconv(cd,NULL,NULL,NULL,NULL);
+         iconv_close(cd);],
+        am_cv_lib_iconv=yes
+        am_cv_func_iconv=yes)
+      LIBS="$am_save_LIBS"
+    fi
+  ])
+  if test "$am_cv_func_iconv" = yes; then
+    AC_DEFINE(HAVE_ICONV, 1, [Define if you have the iconv() function.])
+    AC_MSG_CHECKING([for iconv declaration])
+    AC_CACHE_VAL(am_cv_proto_iconv, [
+      AC_TRY_COMPILE([
+#include <stdlib.h>
+#include <iconv.h>
+extern
+#ifdef __cplusplus
+"C"
+#endif
+#if defined(__STDC__) || defined(__cplusplus)
+size_t iconv (iconv_t cd, char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);
+#else
+size_t iconv();
+#endif
+], [], am_cv_proto_iconv_arg1="", am_cv_proto_iconv_arg1="const")
+      am_cv_proto_iconv="extern size_t iconv (iconv_t cd, $am_cv_proto_iconv_arg1 char * *inbuf, size_t *inbytesleft, char * *outbuf, size_t *outbytesleft);"])
+    am_cv_proto_iconv=`echo "[$]am_cv_proto_iconv" | tr -s ' ' | sed -e 's/( /(/'`
+    AC_MSG_RESULT([$]{ac_t:-
+         }[$]am_cv_proto_iconv)
+    AC_DEFINE_UNQUOTED(ICONV_CONST, $am_cv_proto_iconv_arg1,
+      [Define as const if the declaration of iconv() needs const.])
+  fi
+  LIBICONV=
+  if test "$am_cv_lib_iconv" = yes; then
+    LIBICONV="-liconv"
+  fi
+  AC_SUBST(LIBICONV)
+])
Index: c-exp.y
===================================================================
RCS file: /cvs/src/src/gdb/c-exp.y,v
retrieving revision 1.12
diff -u -p -r1.12 c-exp.y
--- c-exp.y	3 Sep 2002 02:45:56 -0000	1.12
+++ c-exp.y	12 Sep 2002 23:46:13 -0000
@@ -49,6 +49,7 @@ Foundation, Inc., 59 Temple Place - Suit
 #include "bfd.h" /* Required by objfiles.h.  */
 #include "symfile.h" /* Required by objfiles.h.  */
 #include "objfiles.h" /* For have_full_symbols and have_partial_symbols */
+#include "charset.h"
 
 /* Flag indicating we're dealing with HP-compiled objects */ 
 extern int hp_som_som_object_present;
@@ -1314,6 +1315,15 @@ yylex ()
 	c = parse_escape (&lexptr);
       else if (c == '\'')
 	error ("Empty character constant.");
+      else if (! host_char_to_target (c, &c))
+        {
+          int toklen = lexptr - tokstart + 1;
+          char *tok = alloca (toklen + 1);
+          memcpy (tok, tokstart, toklen);
+          tok[toklen] = '\0';
+          error ("There is no character corresponding to %s in the target "
+                 "character set `%s'.", tok, target_charset ());
+        }
 
       yylval.typed_val_int.val = c;
       yylval.typed_val_int.type = builtin_type_char;
@@ -1464,6 +1474,8 @@ yylex ()
       tempbufindex = 0;
 
       do {
+        char *char_start_pos = tokptr;
+
 	/* Grow the static temp buffer if necessary, including allocating
 	   the first one on demand. */
 	if (tempbufindex + 1 >= tempbufsize)
@@ -1486,7 +1498,19 @@ yylex ()
 	    tempbuf[tempbufindex++] = c;
 	    break;
 	  default:
-	    tempbuf[tempbufindex++] = *tokptr++;
+	    c = *tokptr++;
+            if (! host_char_to_target (c, &c))
+              {
+                int len = tokptr - char_start_pos;
+                char *copy = alloca (len + 1);
+                memcpy (copy, char_start_pos, len);
+                copy[len] = '\0';
+
+                error ("There is no character corresponding to `%s' "
+                       "in the target character set `%s'.",
+                       copy, target_charset ());
+              }
+            tempbuf[tempbufindex++] = c;
 	    break;
 	  }
       } while ((*tokptr != '"') && (*tokptr != '\0'));
Index: c-lang.c
===================================================================
RCS file: /cvs/src/src/gdb/c-lang.c,v
retrieving revision 1.13
diff -u -p -r1.13 c-lang.c
--- c-lang.c	11 Jul 2002 13:50:49 -0000	1.13
+++ c-lang.c	12 Sep 2002 23:46:13 -0000
@@ -29,6 +29,7 @@
 #include "valprint.h"
 #include "macroscope.h"
 #include "gdb_assert.h"
+#include "charset.h"
 
 extern void _initialize_c_language (void);
 static void c_emit_char (int c, struct ui_file * stream, int quoter);
@@ -40,52 +41,23 @@ static void c_emit_char (int c, struct u
 static void
 c_emit_char (register int c, struct ui_file *stream, int quoter)
 {
+  const char *escape;
+  int host_char;
+
   c &= 0xFF;			/* Avoid sign bit follies */
 
-  if (PRINT_LITERAL_FORM (c))
+  escape = c_target_char_has_backslash_escape (c);
+  if (escape)
+    fprintf_filtered (stream, "\\%s", escape);
+  else if (target_char_to_host (c, &host_char)
+           && host_char_print_literally (host_char))
     {
-      if (c == '\\' || c == quoter)
-	{
-	  fputs_filtered ("\\", stream);
-	}
-      fprintf_filtered (stream, "%c", c);
+      if (host_char == '\\' || host_char == quoter)
+        fputs_filtered ("\\", stream);
+      fprintf_filtered (stream, "%c", host_char);
     }
   else
-    {
-      switch (c)
-	{
-	case '\n':
-	  fputs_filtered ("\\n", stream);
-	  break;
-	case '\b':
-	  fputs_filtered ("\\b", stream);
-	  break;
-	case '\t':
-	  fputs_filtered ("\\t", stream);
-	  break;
-	case '\f':
-	  fputs_filtered ("\\f", stream);
-	  break;
-	case '\r':
-	  fputs_filtered ("\\r", stream);
-	  break;
-        case '\013':
-          fputs_filtered ("\\v", stream);
-          break;
-	case '\033':
-	  fputs_filtered ("\\e", stream);
-	  break;
-	case '\007':
-	  fputs_filtered ("\\a", stream);
-	  break;
-        case '\0':
-          fputs_filtered ("\\0", stream);
-          break;
-	default:
-	  fprintf_filtered (stream, "\\%.3o", (unsigned int) c);
-	  break;
-	}
-    }
+    fprintf_filtered (stream, "\\%.3o", (unsigned int) c);
 }
 
 void
Index: charset.c
===================================================================
RCS file: charset.c
diff -N charset.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ charset.c	12 Sep 2002 23:46:14 -0000
@@ -0,0 +1,1274 @@
+/* Character set conversion support for GDB.
+   Copyright 2001 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#include "defs.h"
+#include "charset.h"
+#include "gdbcmd.h"
+#include "gdb_assert.h"
+
+#include <stddef.h>
+#include <string.h>
+#include <ctype.h>
+
+#ifdef HAVE_ICONV
+#include <iconv.h>
+#endif
+
+
+/* How GDB's character set support works
+
+   GDB has two global settings:
+
+   - The `current host character set' is the character set GDB should
+     use in talking to the user, and which (hopefully) the user's
+     terminal knows how to display properly.
+
+   - The `current target character set' is the character set the
+     program being debugged uses.
+
+   There are commands to set each of these, and mechanisms for
+   choosing reasonable default values.  GDB has a global list of
+   character sets that it can use as its host or target character
+   sets.
+
+   The header file `charset.h' declares various functions that
+   different pieces of GDB need to perform tasks like:
+
+   - printing target strings and characters to the user's terminal
+     (mostly target->host conversions),
+
+   - building target-appropriate representations of strings and
+     characters the user enters in expressions (mostly host->target
+     conversions),
+
+   and so on.
+
+   Now, many of these operations are specific to a particular
+   host/target character set pair.  If GDB supports N character sets,
+   there are N^2 possible pairs.  This means that, the larger GDB's
+   repertoire of character sets gets, the more expensive it gets to add
+   new character sets.
+
+   To make sure that GDB can do the right thing for every possible
+   pairing of host and target character set, while still allowing
+   GDB's repertoire to scale, we use a two-tiered approach:
+
+   - We maintain a global table of "translations" --- groups of
+     functions specific to a particular pair of character sets.
+
+   - However, a translation can be incomplete: some functions can be
+     omitted.  Where there is not a translation to specify exactly
+     what function to use, we provide reasonable defaults.  The
+     default behaviors try to use the "iconv" library functions, which
+     support a wide range of character sets.  However, even if iconv
+     is not available, there are fallbacks to support trivial
+     translations: when the host and target character sets are the
+     same.  */
+
+
+/* The character set and translation structures.  */
+
+
+/* A character set GDB knows about.  GDB only supports character sets
+   with stateless encodings, in which every character is one byte
+   long.  */
+struct charset {
+
+  /* A singly-linked list of all known charsets.  */
+  struct charset *next;
+
+  /* The name of the character set.  Comparisons on character set
+     names are case-insensitive.  */
+  const char *name;
+
+  /* Non-zero iff this character set can be used as a host character
+     set.  At present, GDB basically assumes that the host character
+     set is a superset of ASCII.  */
+  int valid_host_charset;
+
+  /* Pointers to charset-specific functions that depend only on a
+     single character set, and data pointers to pass to them.  */
+  int (*host_char_print_literally) (void *baton,
+                                    int host_char);
+  void *host_char_print_literally_baton;
+
+  int (*target_char_to_control_char) (void *baton,
+                                      int target_char,
+                                      int *target_ctrl_char);
+  void *target_char_to_control_char_baton;
+};
+
+
+/* A translation from one character set to another.  */
+struct translation {
+
+  /* A singly-linked list of all known translations.  */
+  struct translation *next;
+
+  /* This structure describes functions going from the FROM character
+     set to the TO character set.  Comparisons on character set names
+     are case-insensitive.  */
+  const char *from, *to;
+
+  /* Pointers to translation-specific functions, and data pointers to
+     pass to them.  These pointers can be zero, indicating that GDB
+     should fall back on the default behavior.  We hope the default
+     behavior will be correct for many from/to pairs, reducing the
+     number of translations that need to be registered explicitly.  */
+  
+  /* TARGET_CHAR is in the `from' charset.
+     Returns a string in the `to' charset.  */
+  const char *(*c_target_char_has_backslash_escape) (void *baton,
+                                                     int target_char);
+  void *c_target_char_has_backslash_escape_baton;
+
+  /* HOST_CHAR is in the `from' charset.
+     TARGET_CHAR points to a char in the `to' charset.  */
+  int (*c_parse_backslash) (void *baton, int host_char, int *target_char);
+  void *c_parse_backslash_baton;
+
+  /* This is used for the host_char_to_target and target_char_to_host
+     functions.  */
+  int (*convert_char) (void *baton, int from, int *to);
+  void *convert_char_baton;
+};
+
+
+
+/* The global lists of character sets and translations.  */
+
+
+/* Character set names are always compared ignoring case.  */
+static int
+strcmp_case_insensitive (const char *p, const char *q)
+{
+  while (*p && *q && tolower (*p) == tolower (*q))
+    p++, q++;
+
+  return tolower (*p) - tolower (*q);
+}
+
+
+/* The global list of all the charsets GDB knows about.  */
+static struct charset *all_charsets;
+
+
+static void
+register_charset (struct charset *cs)
+{
+  struct charset **ptr;
+
+  /* Put the new charset on the end, so that the list ends up in the
+     same order as the registrations in the _initialize function.  */
+  for (ptr = &all_charsets; *ptr; ptr = &(*ptr)->next)
+    ;
+
+  cs->next = 0;
+  *ptr = cs;
+}
+
+
+static struct charset *
+lookup_charset (const char *name)
+{
+  struct charset *cs;
+
+  for (cs = all_charsets; cs; cs = cs->next)
+    if (! strcmp_case_insensitive (name, cs->name))
+      return cs;
+
+  return NULL;
+}
+
+
+/* The global list of translations.  */
+static struct translation *all_translations;
+
+
+static void
+register_translation (struct translation *t)
+{
+  t->next = all_translations;
+  all_translations = t;
+}
+
+
+static struct translation *
+lookup_translation (const char *from, const char *to)
+{
+  struct translation *t;
+
+  for (t = all_translations; t; t = t->next)
+    if (! strcmp_case_insensitive (from, t->from)
+        && ! strcmp_case_insensitive (to, t->to))
+      return t;
+
+  return 0;
+}
+
+
+
+/* Constructing charsets.  */
+
+/* Allocate, initialize and return a straightforward charset.
+   Use this function, rather than creating the structures yourself,
+   so that we can add new fields to the structure in the future without
+   having to tweak all the old charset descriptions.  */
+static struct charset *
+simple_charset (const char *name,
+                int valid_host_charset,
+                int (*host_char_print_literally) (void *baton, int host_char),
+                void *host_char_print_literally_baton,
+                int (*target_char_to_control_char) (void *baton,
+                                                    int target_char,
+                                                    int *target_ctrl_char),
+                void *target_char_to_control_char_baton)
+{
+  struct charset *cs = xmalloc (sizeof (*cs));
+
+  memset (cs, 0, sizeof (*cs));
+  cs->name = name;
+  cs->valid_host_charset = valid_host_charset;
+  cs->host_char_print_literally = host_char_print_literally;
+  cs->host_char_print_literally_baton = host_char_print_literally_baton;
+  cs->target_char_to_control_char = target_char_to_control_char;
+  cs->target_char_to_control_char_baton = target_char_to_control_char_baton;
+
+  return cs;
+}
+
+
+
+/* ASCII functions.  */
+
+static int
+ascii_print_literally (void *baton, int c)
+{
+  c &= 0xff;
+
+  return (0x20 <= c && c <= 0x7e);
+}
+
+
+static int
+ascii_to_control (void *baton, int c, int *ctrl_char)
+{
+  *ctrl_char = (c & 037);
+  return 1;
+}
+
+
+/* ISO-8859 family functions.  */
+
+
+static int
+iso_8859_print_literally (void *baton, int c)
+{
+  c &= 0xff;
+
+  return ((0x20 <= c && c <= 0x7e) /* ascii printables */
+          || (! sevenbit_strings && 0xA0 <= c)); /* iso 8859 printables */
+}
+
+
+static int
+iso_8859_to_control (void *baton, int c, int *ctrl_char)
+{
+  *ctrl_char = (c & 0200) | (c & 037);
+  return 1;
+}
+
+
+/* Construct an ISO-8859-like character set.  */
+static struct charset *
+iso_8859_family_charset (const char *name)
+{
+  return simple_charset (name, 1,
+                         iso_8859_print_literally, 0,
+                         iso_8859_to_control, 0);
+}
+
+
+
+/* EBCDIC family functions.  */
+
+
+static int
+ebcdic_print_literally (void *baton, int c)
+{
+  c &= 0xff;
+
+  return (64 <= c && c <= 254);
+}
+
+
+static int
+ebcdic_to_control (void *baton, int c, int *ctrl_char)
+{
+  /* There are no control character equivalents in EBCDIC.  Use
+     numeric escapes.  */
+  return 0;
+}
+
+
+/* Construct an EBCDIC-like character set.  */
+static struct charset *
+ebcdic_family_charset (const char *name)
+{
+  return simple_charset (name, 0,
+                         ebcdic_print_literally, 0,
+                         ebcdic_to_control, 0);
+}
+                
+
+
+
+
+/* Fallback functions using iconv.  */
+
+#if defined(HAVE_ICONV)
+
+struct cached_iconv {
+  struct charset *from, *to;
+  iconv_t i;
+};
+
+
+/* Make sure the iconv cache *CI contains an iconv descriptor
+   translating from FROM to TO.  If it already does, fine; otherwise,
+   close any existing descriptor, and open up a new one.  On success,
+   return zero; on failure, return -1 and set errno.  */
+static int
+check_iconv_cache (struct cached_iconv *ci,
+                   struct charset *from,
+                   struct charset *to)
+{
+  iconv_t i;
+
+  /* Does the cached iconv descriptor match the conversion we're trying
+     to do now?  */
+  if (ci->from == from
+      && ci->to == to
+      && ci->i != (iconv_t) 0)
+    return 0;
+
+  /* It doesn't.  If we actually had any iconv descriptor open at
+     all, close it now.  */
+  if (ci->i != (iconv_t) 0)
+    {
+      i = ci->i;
+      ci->i = (iconv_t) 0;
+      
+      if (iconv_close (i) == -1)
+        error ("Error closing `iconv' descriptor for "
+               "`%s'-to-`%s' character conversion: %s",
+               ci->from->name, ci->to->name, safe_strerror (errno));
+    }
+
+  /* Open a new iconv descriptor for the required conversion.  */
+  i = iconv_open (to->name, from->name);
+  if (i == (iconv_t) -1)
+    return -1;
+
+  ci->i = i;
+  ci->from = from;
+  ci->to = to;
+
+  return 0;
+}
+
+
+/* Convert FROM_CHAR using the cached iconv conversion *CI.  Return
+   non-zero if the conversion was successful, zero otherwise.  */
+static int
+cached_iconv_convert (struct cached_iconv *ci, int from_char, int *to_char)
+{
+  char from;
+  ICONV_CONST char *from_ptr = &from;
+  char to, *to_ptr = &to;
+  size_t from_left = sizeof (from), to_left = sizeof (to);
+
+  gdb_assert (ci->i != (iconv_t) 0);
+
+  from = from_char;
+  if (iconv (ci->i, &from_ptr, &from_left, &to_ptr, &to_left)
+      == (size_t) -1)
+    {
+      /* These all suggest that the input or output character sets
+         have multi-byte encodings of some characters, which means
+         it's unsuitable for use as a GDB character set.  We should
+         never have selected it.  */
+      gdb_assert (errno != E2BIG && errno != EINVAL);
+
+      /* This suggests a bug in the code managing *CI.  */
+      gdb_assert (errno != EBADF);
+
+      /* This seems to mean that there is no equivalent character in
+         the `to' character set.  */
+      if (errno == EILSEQ)
+        return 0;
+
+      /* Anything else is mysterious.  */
+      internal_error ("Error converting character `%d' from `%s' to `%s' "
+                      "character set: %s",
+                      from_char, ci->from->name, ci->to->name,
+                      safe_strerror (errno));
+    }
+
+  /* If the pointers weren't advanced across the input, that also
+     suggests something was wrong.  */
+  gdb_assert (from_left == 0 && to_left == 0);
+
+  *to_char = (unsigned char) to;
+  return 1;
+}
+
+
+static void
+register_iconv_charsets ()
+{
+  /* Here we should check whether various character sets were
+     recognized by the local iconv implementation.
+
+     The first implementation registered a bunch of character sets
+     recognized by iconv, but then we discovered that iconv on Solaris
+     and iconv on GNU/Linux had no character sets in common.  So we
+     replaced them with the hard-coded tables that appear later in the
+     file.  */
+}
+
+#endif /* defined (HAVE_ICONV) */
+
+
+/* Fallback routines for systems without iconv.  */
+
+#if ! defined (HAVE_ICONV) 
+struct cached_iconv { char nothing; };
+
+static int
+check_iconv_cache (struct cached_iconv *ci,
+                   struct charset *from,
+                   struct charset *to)
+{
+  errno = EINVAL;
+  return -1;
+}
+
+static int
+cached_iconv_convert (struct cached_iconv *ci, int from_char, int *to_char)
+{
+  /* This function should never be called.  */
+  gdb_assert (0);
+}
+
+static void
+register_iconv_charsets ()
+{
+}
+
+#endif /* ! defined(HAVE_ICONV) */
+
+
+/* Default trivial conversion functions.  */
+
+static int
+identity_either_char_to_other (void *baton, int either_char, int *other_char)
+{
+  *other_char = either_char;
+  return 1;
+}
+
+
+
+/* Default non-trivial conversion functions.  */
+
+
+static char backslashable[] = "abefnrtv";
+static char *backslashed[] = {"a", "b", "e", "f", "n", "r", "t", "v", "0"};
+static char represented[] = "\a\b\e\f\n\r\t\v";
+
+
+/* Translate TARGET_CHAR into the host character set, and see if it
+   matches any of our standard escape sequences.  */
+static const char *
+default_c_target_char_has_backslash_escape (void *baton, int target_char)
+{
+  int host_char;
+  const char *ix;
+
+  /* If target_char has no equivalent in the host character set,
+     assume it doesn't have a backslashed form.  */
+  if (! target_char_to_host (target_char, &host_char))
+    return NULL;
+
+  ix = strchr (represented, host_char);
+  if (ix)
+    return backslashed[ix - represented];
+  else
+    return NULL;
+}
+
+
+/* Translate the backslash the way we would in the host character set,
+   and then try to translate that into the target character set.  */
+static int
+default_c_parse_backslash (void *baton, int host_char, int *target_char)
+{
+  const char *ix;
+
+  ix = strchr (backslashable, host_char);
+
+  if (! ix)
+    return 0;
+  else
+    return host_char_to_target (represented[ix - backslashable],
+                                target_char);
+}
+
+
+/* Convert using a cached iconv descriptor.  */
+static int
+iconv_convert (void *baton, int from_char, int *to_char)
+{
+  struct cached_iconv *ci = baton;
+  return cached_iconv_convert (ci, from_char, to_char);
+}
+
+
+
+/* Conversion tables.  */
+
+
+/* I'd much rather fall back on iconv whenever possible.  But the
+   character set names you use with iconv aren't standardized at all,
+   a lot of platforms have really meager character set coverage, etc.
+   I wanted to have at least something we could use to exercise the
+   test suite on all platforms.
+
+   In the long run, we should have a configure-time process explore
+   somehow which character sets the host platform supports, and some
+   arrangement that allows GDB users to use platform-indepedent names
+   for character sets.  */
+
+
+/* We generated these tables using iconv on a GNU/Linux machine.  */
+
+
+static int ascii_to_iso_8859_1_table[] = {
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 80 */
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 96 */
+   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 112 */
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 128 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int ascii_to_ebcdic_us_table[] = {
+    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
+   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
+  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
+  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
+  215,216,217,226,227,228,229,230,231,232,233, -1,224, -1, -1,109, /* 96 */
+  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
+  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int ascii_to_ibm1047_table[] = {
+    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
+   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
+  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
+  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
+  215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109, /* 96 */
+  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
+  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int iso_8859_1_to_ascii_table[] = {
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
+   64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, /* 80 */
+   80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, /* 96 */
+   96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111, /* 112 */
+  112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127, /* 128 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int iso_8859_1_to_ebcdic_us_table[] = {
+    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
+   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
+  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
+  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
+  215,216,217,226,227,228,229,230,231,232,233, -1,224, -1, -1,109, /* 96 */
+  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
+  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
+   32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27, /* 144 */
+   48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,255, /* 160 */
+   -1, -1, 74, -1, -1, -1,106, -1, -1, -1, -1, -1, 95, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 208 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 224 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 240 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int iso_8859_1_to_ibm1047_table[] = {
+    0,  1,  2,  3, 55, 45, 46, 47, 22,  5, 37, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 60, 61, 50, 38, 24, 25, 63, 39, 28, 29, 30, 31, /* 32 */
+   64, 90,127,123, 91,108, 80,125, 77, 93, 92, 78,107, 96, 75, 97, /* 48 */
+  240,241,242,243,244,245,246,247,248,249,122, 94, 76,126,110,111, /* 64 */
+  124,193,194,195,196,197,198,199,200,201,209,210,211,212,213,214, /* 80 */
+  215,216,217,226,227,228,229,230,231,232,233,173,224,189, 95,109, /* 96 */
+  121,129,130,131,132,133,134,135,136,137,145,146,147,148,149,150, /* 112 */
+  151,152,153,162,163,164,165,166,167,168,169,192, 79,208,161,  7, /* 128 */
+   32, 33, 34, 35, 36, 21,  6, 23, 40, 41, 42, 43, 44,  9, 10, 27, /* 144 */
+   48, 49, 26, 51, 52, 53, 54,  8, 56, 57, 58, 59,  4, 20, 62,255, /* 160 */
+   65,170, 74,177,159,178,106,181,187,180,154,138,176,202,175,188, /* 176 */
+  144,143,234,250,190,160,182,179,157,218,155,139,183,184,185,171, /* 192 */
+  100,101, 98,102, 99,103,158,104,116,113,114,115,120,117,118,119, /* 208 */
+  172,105,237,238,235,239,236,191,128,253,254,251,252,186,174, 89, /* 224 */
+   68, 69, 66, 70, 67, 71,156, 72, 84, 81, 82, 83, 88, 85, 86, 87, /* 240 */
+  140, 73,205,206,203,207,204,225,112,221,222,219,220,141,142,223  /* 256 */
+};
+
+
+static int ebcdic_us_to_ascii_table[] = {
+    0,  1,  2,  3, -1,  9, -1,127, -1, -1, -1, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, -1, -1,  8, -1, 24, 25, -1, -1, 28, 29, 30, 31, /* 32 */
+   -1, -1, -1, -1, -1, 10, 23, 27, -1, -1, -1, -1, -1,  5,  6,  7, /* 48 */
+   -1, -1, 22, -1, -1, -1, -1,  4, -1, -1, -1, -1, 20, 21, -1, 26, /* 64 */
+   32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46, 60, 40, 43,124, /* 80 */
+   38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59, -1, /* 96 */
+   45, 47, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, 37, 95, 62, 63, /* 112 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */
+   -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+  123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */
+  125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */
+   92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int ebcdic_us_to_iso_8859_1_table[] = {
+    0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19,157,133,  8,135, 24, 25,146,143, 28, 29, 30, 31, /* 32 */
+  128,129,130,131,132, 10, 23, 27,136,137,138,139,140,  5,  6,  7, /* 48 */
+  144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26, /* 64 */
+   32, -1, -1, -1, -1, -1, -1, -1, -1, -1,162, 46, 60, 40, 43,124, /* 80 */
+   38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59,172, /* 96 */
+   45, 47, -1, -1, -1, -1, -1, -1, -1, -1,166, 44, 37, 95, 62, 63, /* 112 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */
+   -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+  123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */
+  125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */
+   92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1,159  /* 256 */
+};
+
+
+static int ebcdic_us_to_ibm1047_table[] = {
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
+   64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 74, 75, 76, 77, 78, 79, /* 80 */
+   80, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90, 91, 92, 93, 94,176, /* 96 */
+   96, 97, -1, -1, -1, -1, -1, -1, -1, -1,106,107,108,109,110,111, /* 112 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1,121,122,123,124,125,126,127, /* 128 */
+   -1,129,130,131,132,133,134,135,136,137, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1,145,146,147,148,149,150,151,152,153, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1,161,162,163,164,165,166,167,168,169, -1, -1, -1, -1, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+  192,193,194,195,196,197,198,199,200,201, -1, -1, -1, -1, -1, -1, /* 208 */
+  208,209,210,211,212,213,214,215,216,217, -1, -1, -1, -1, -1, -1, /* 224 */
+  224, -1,226,227,228,229,230,231,232,233, -1, -1, -1, -1, -1, -1, /* 240 */
+  240,241,242,243,244,245,246,247,248,249, -1, -1, -1, -1, -1,255  /* 256 */
+};
+
+
+static int ibm1047_to_ascii_table[] = {
+    0,  1,  2,  3, -1,  9, -1,127, -1, -1, -1, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, -1, -1,  8, -1, 24, 25, -1, -1, 28, 29, 30, 31, /* 32 */
+   -1, -1, -1, -1, -1, 10, 23, 27, -1, -1, -1, -1, -1,  5,  6,  7, /* 48 */
+   -1, -1, 22, -1, -1, -1, -1,  4, -1, -1, -1, -1, 20, 21, -1, 26, /* 64 */
+   32, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 46, 60, 40, 43,124, /* 80 */
+   38, -1, -1, -1, -1, -1, -1, -1, -1, -1, 33, 36, 42, 41, 59, 94, /* 96 */
+   45, 47, -1, -1, -1, -1, -1, -1, -1, -1, -1, 44, 37, 95, 62, 63, /* 112 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, 96, 58, 35, 64, 39, 61, 34, /* 128 */
+   -1, 97, 98, 99,100,101,102,103,104,105, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1,106,107,108,109,110,111,112,113,114, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1,126,115,116,117,118,119,120,121,122, -1, -1, -1, 91, -1, -1, /* 176 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 93, -1, -1, /* 192 */
+  123, 65, 66, 67, 68, 69, 70, 71, 72, 73, -1, -1, -1, -1, -1, -1, /* 208 */
+  125, 74, 75, 76, 77, 78, 79, 80, 81, 82, -1, -1, -1, -1, -1, -1, /* 224 */
+   92, -1, 83, 84, 85, 86, 87, 88, 89, 90, -1, -1, -1, -1, -1, -1, /* 240 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, -1, -1, -1, -1, -1, -1  /* 256 */
+};
+
+
+static int ibm1047_to_iso_8859_1_table[] = {
+    0,  1,  2,  3,156,  9,134,127,151,141,142, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19,157,133,  8,135, 24, 25,146,143, 28, 29, 30, 31, /* 32 */
+  128,129,130,131,132, 10, 23, 27,136,137,138,139,140,  5,  6,  7, /* 48 */
+  144,145, 22,147,148,149,150,  4,152,153,154,155, 20, 21,158, 26, /* 64 */
+   32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124, /* 80 */
+   38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94, /* 96 */
+   45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63, /* 112 */
+  248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34, /* 128 */
+  216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177, /* 144 */
+  176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164, /* 160 */
+  181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174, /* 176 */
+  172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215, /* 192 */
+  123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245, /* 208 */
+  125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255, /* 224 */
+   92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213, /* 240 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159  /* 256 */
+};
+
+
+static int ibm1047_to_ebcdic_us_table[] = {
+    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, /* 16 */
+   16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, /* 32 */
+   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, /* 48 */
+   48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, /* 64 */
+   64, -1, -1, -1, -1, -1, -1, -1, -1, -1, 74, 75, 76, 77, 78, 79, /* 80 */
+   80, -1, -1, -1, -1, -1, -1, -1, -1, -1, 90, 91, 92, 93, 94, -1, /* 96 */
+   96, 97, -1, -1, -1, -1, -1, -1, -1, -1,106,107,108,109,110,111, /* 112 */
+   -1, -1, -1, -1, -1, -1, -1, -1, -1,121,122,123,124,125,126,127, /* 128 */
+   -1,129,130,131,132,133,134,135,136,137, -1, -1, -1, -1, -1, -1, /* 144 */
+   -1,145,146,147,148,149,150,151,152,153, -1, -1, -1, -1, -1, -1, /* 160 */
+   -1,161,162,163,164,165,166,167,168,169, -1, -1, -1, -1, -1, -1, /* 176 */
+   95, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 192 */
+  192,193,194,195,196,197,198,199,200,201, -1, -1, -1, -1, -1, -1, /* 208 */
+  208,209,210,211,212,213,214,215,216,217, -1, -1, -1, -1, -1, -1, /* 224 */
+  224, -1,226,227,228,229,230,231,232,233, -1, -1, -1, -1, -1, -1, /* 240 */
+  240,241,242,243,244,245,246,247,248,249, -1, -1, -1, -1, -1,255  /* 256 */
+};
+
+
+static int
+table_convert_char (void *baton, int from, int *to)
+{
+  int *table = (int *) baton;
+
+  if (0 <= from && from <= 255
+      && table[from] != -1)
+    {
+      *to = table[from];
+      return 1;
+    }
+  else
+    return 0;
+}
+
+
+static struct translation *
+table_translation (const char *from, const char *to, int *table,
+                   const char *(*c_target_char_has_backslash_escape)
+                   (void *baton, int target_char),
+                   void *c_target_char_has_backslash_escape_baton,
+                   int (*c_parse_backslash) (void *baton,
+                                             int host_char,
+                                             int *target_char),
+                   void *c_parse_backslash_baton)
+{
+  struct translation *t = xmalloc (sizeof (*t));
+
+  memset (t, 0, sizeof (*t));
+  t->from = from;
+  t->to = to;
+  t->c_target_char_has_backslash_escape = c_target_char_has_backslash_escape;
+  t->c_target_char_has_backslash_escape_baton
+    = c_target_char_has_backslash_escape_baton;
+  t->c_parse_backslash = c_parse_backslash;
+  t->c_parse_backslash_baton = c_parse_backslash_baton;
+  t->convert_char = table_convert_char;
+  t->convert_char_baton = (void *) table;
+
+  return t;
+}
+
+
+static struct translation *
+simple_table_translation (const char *from, const char *to, int *table)
+{
+  return table_translation (from, to, table, 0, 0, 0, 0);
+}
+
+
+
+/* Setting and retrieving the host and target charsets.  */
+
+
+/* The current host and target character sets.  */
+static struct charset *current_host_charset, *current_target_charset;
+
+/* The current functions and batons we should use for the functions in
+   charset.h.  */
+
+static const char *(*c_target_char_has_backslash_escape_func)
+     (void *baton, int target_char);
+static void *c_target_char_has_backslash_escape_baton;
+
+static int (*c_parse_backslash_func) (void *baton,
+                                      int host_char,
+                                      int *target_char);
+static void *c_parse_backslash_baton;
+
+static int (*host_char_to_target_func) (void *baton,
+                                        int host_char,
+                                        int *target_char);
+static void *host_char_to_target_baton;
+
+static int (*target_char_to_host_func) (void *baton,
+                                        int target_char,
+                                        int *host_char);
+static void *target_char_to_host_baton;
+
+
+/* Cached iconv conversions, that might be useful to fallback
+   routines.  */
+static struct cached_iconv cached_iconv_host_to_target;
+static struct cached_iconv cached_iconv_target_to_host;
+
+
+/* Set the host and target character sets to HOST and TARGET.  */
+static void
+set_host_and_target_charsets (struct charset *host, struct charset *target)
+{
+  struct translation *h2t, *t2h;
+
+  /* If they're not both initialized yet, then just do nothing for
+     now.  As soon as we're done running our initialize function,
+     everything will be initialized.  */
+  if (! host || ! target)
+    {
+      current_host_charset = host;
+      current_target_charset = target;
+      return;
+    }
+
+  h2t = lookup_translation (host->name, target->name);
+  t2h = lookup_translation (target->name, host->name);
+
+  /* If the translations don't provide conversion functions, make sure
+     iconv can back them up.  Do this *before* modifying any state.  */
+  if (host != target)
+    {
+      if (! h2t || ! h2t->convert_char)
+        {
+          if (check_iconv_cache (&cached_iconv_host_to_target, host, target)
+              < 0)
+            error ("GDB can't convert from the `%s' character set to `%s'.",
+                   host->name, target->name);
+        }
+      if (! t2h || ! t2h->convert_char)
+        {
+          if (check_iconv_cache (&cached_iconv_target_to_host, target, host)
+              < 0)
+            error ("GDB can't convert from the `%s' character set to `%s'.",
+                   target->name, host->name);
+        }
+    }
+
+  if (t2h && t2h->c_target_char_has_backslash_escape)
+    {
+      c_target_char_has_backslash_escape_func
+        = t2h->c_target_char_has_backslash_escape;
+      c_target_char_has_backslash_escape_baton
+        = t2h->c_target_char_has_backslash_escape_baton;
+    }
+  else
+    c_target_char_has_backslash_escape_func
+      = default_c_target_char_has_backslash_escape;
+
+  if (h2t && h2t->c_parse_backslash)
+    {
+      c_parse_backslash_func = h2t->c_parse_backslash;
+      c_parse_backslash_baton = h2t->c_parse_backslash_baton;
+    }
+  else
+    c_parse_backslash_func = default_c_parse_backslash;
+
+  if (h2t && h2t->convert_char)
+    {
+      host_char_to_target_func = h2t->convert_char;
+      host_char_to_target_baton = h2t->convert_char_baton;
+    }
+  else if (host == target)
+    host_char_to_target_func = identity_either_char_to_other;
+  else
+    {
+      host_char_to_target_func = iconv_convert;
+      host_char_to_target_baton = &cached_iconv_host_to_target;
+    }
+
+  if (t2h && t2h->convert_char)
+    {
+      target_char_to_host_func = t2h->convert_char;
+      target_char_to_host_baton = t2h->convert_char_baton;
+    }
+  else if (host == target)
+    target_char_to_host_func = identity_either_char_to_other;
+  else
+    {
+      target_char_to_host_func = iconv_convert;
+      target_char_to_host_baton = &cached_iconv_target_to_host;
+    }
+
+  current_host_charset = host;
+  current_target_charset = target;
+}
+
+
+static struct charset *
+lookup_charset_or_error (const char *name)
+{
+  struct charset *cs = lookup_charset (name);
+
+  if (! cs)
+    error ("GDB doesn't know of any character set named `%s'.", name);
+
+  return cs;
+}
+    
+
+static void
+check_valid_host_charset (struct charset *cs)
+{
+  if (! cs->valid_host_charset)
+    error ("GDB can't use `%s' as its host character set.", cs->name);
+}
+
+
+void
+set_host_charset (const char *charset)
+{
+  struct charset *cs = lookup_charset_or_error (charset);
+  check_valid_host_charset (cs);
+  set_host_and_target_charsets (cs, current_target_charset);
+}
+
+
+const char *
+host_charset ()
+{
+  return current_host_charset->name;
+}
+
+
+void
+set_target_charset (const char *charset)
+{
+  struct charset *cs = lookup_charset_or_error (charset);
+
+  set_host_and_target_charsets (current_host_charset, cs);
+}
+
+
+const char *
+target_charset ()
+{
+  return current_target_charset->name;
+}
+
+
+
+/* Public character management functions.  */
+
+
+const char *
+c_target_char_has_backslash_escape (int target_char)
+{
+  return ((*c_target_char_has_backslash_escape_func)
+          (c_target_char_has_backslash_escape_baton, target_char));
+}
+
+
+int
+c_parse_backslash (int host_char, int *target_char)
+{
+  return (*c_parse_backslash_func) (c_parse_backslash_baton,
+                                    host_char, target_char);
+}
+
+
+int
+host_char_print_literally (int host_char)
+{
+  return ((*current_host_charset->host_char_print_literally)
+          (current_host_charset->host_char_print_literally_baton,
+           host_char));
+}
+
+
+int
+target_char_to_control_char (int target_char, int *target_ctrl_char)
+{
+  return ((*current_target_charset->target_char_to_control_char)
+          (current_target_charset->target_char_to_control_char_baton,
+           target_char, target_ctrl_char));
+}
+
+
+int
+host_char_to_target (int host_char, int *target_char)
+{
+  return ((*host_char_to_target_func)
+          (host_char_to_target_baton, host_char, target_char));
+}
+
+
+int
+target_char_to_host (int target_char, int *host_char)
+{
+  return ((*target_char_to_host_func)
+          (target_char_to_host_baton, target_char, host_char));
+}
+
+
+
+/* Commands.  */
+
+
+/* List the valid character sets.  If HOST_ONLY is non-zero, list only
+   those character sets which can be used as GDB's host character set.  */
+static void
+list_charsets (int host_only)
+{
+  struct charset *cs;
+
+  printf_filtered ("Valid character sets are:\n");
+
+  for (cs = all_charsets; cs; cs = cs->next)
+    if (host_only && cs->valid_host_charset)
+      printf_filtered ("  %s\n", cs->name);
+    else
+      printf_filtered ("  %s %s\n",
+                       cs->name,
+                       cs->valid_host_charset ? "*" : " ");
+
+  if (! host_only)
+    printf_filtered ("* - can be used as a host character set\n");
+}
+
+
+static void
+set_charset_command (char *arg, int from_tty)
+{
+  if (! arg || arg[0] == '\0')
+    list_charsets (0);
+  else
+    {
+      struct charset *cs = lookup_charset_or_error (arg);
+      check_valid_host_charset (cs);
+      set_host_and_target_charsets (cs, cs); 
+    }
+}
+
+
+static void
+set_host_charset_command (char *arg, int from_tty)
+{
+  if (! arg || arg[0] == '\0')
+    list_charsets (1);
+  else
+    {
+      struct charset *cs = lookup_charset_or_error (arg);
+      check_valid_host_charset (cs);
+      set_host_and_target_charsets (cs, current_target_charset);
+    }
+}
+
+
+static void
+set_target_charset_command (char *arg, int from_tty)
+{
+  if (! arg || arg[0] == '\0')
+    list_charsets (0);
+  else
+    {
+      struct charset *cs = lookup_charset_or_error (arg);
+      set_host_and_target_charsets (current_host_charset, cs);
+    }
+}
+
+
+static void
+show_charset_command (char *arg, int from_tty)
+{
+  if (current_host_charset == current_target_charset)
+    {
+      printf_filtered ("The current host and target character set is `%s'.\n",
+                       host_charset ());
+    }
+  else
+    {
+      printf_filtered ("The current host character set is `%s'.\n",
+                       host_charset ());
+      printf_filtered ("The current target character set is `%s'.\n",
+                       target_charset ());
+    }
+}
+
+
+
+/* The charset.c module initialization function.  */
+
+#ifndef GDB_DEFAULT_HOST_CHARSET
+#define GDB_DEFAULT_HOST_CHARSET "ISO-8859-1"
+#endif
+
+#ifndef GDB_DEFAULT_TARGET_CHARSET
+#define GDB_DEFAULT_TARGET_CHARSET "ISO-8859-1"
+#endif
+
+void
+_initialize_charset (void)
+{
+  /* Register all the character set GDB knows about.
+
+     You should use the same names that iconv does, where possible, to
+     take advantage of the iconv-based default behaviors.
+
+     CAUTION: if you register a character set, you must also register
+     as many translations as are necessary to make that character set
+     interoperate correctly with all the other character sets.  We do
+     provide default behaviors when no translation is available, or
+     when a translation's function pointer for a particular operation
+     is zero.  Hopefully, these defaults will be correct often enough
+     that we won't need to provide too many translations.  */
+  register_charset (simple_charset ("ascii", 1,
+                                    ascii_print_literally, 0,
+                                    ascii_to_control, 0));
+  register_charset (iso_8859_family_charset ("iso-8859-1"));
+  register_charset (ebcdic_family_charset ("ebcdic-us"));
+  register_charset (ebcdic_family_charset ("ibm1047"));
+  register_iconv_charsets ();
+
+  {
+    struct { char *from; char *to; int *table; } tlist[] = {
+      { "ascii",      "iso-8859-1", ascii_to_iso_8859_1_table },
+      { "ascii",      "ebcdic-us",  ascii_to_ebcdic_us_table },
+      { "ascii",      "ibm1047",    ascii_to_ibm1047_table },
+      { "iso-8859-1", "ascii",      iso_8859_1_to_ascii_table },
+      { "iso-8859-1", "ebcdic-us",  iso_8859_1_to_ebcdic_us_table },
+      { "iso-8859-1", "ibm1047",    iso_8859_1_to_ibm1047_table },
+      { "ebcdic-us",  "ascii",      ebcdic_us_to_ascii_table },
+      { "ebcdic-us",  "iso-8859-1", ebcdic_us_to_iso_8859_1_table },
+      { "ebcdic-us",  "ibm1047",    ebcdic_us_to_ibm1047_table },
+      { "ibm1047",    "ascii",      ibm1047_to_ascii_table },
+      { "ibm1047",    "iso-8859-1", ibm1047_to_iso_8859_1_table },
+      { "ibm1047",    "ebcdic-us",  ibm1047_to_ebcdic_us_table }
+    };
+
+    int i;
+
+    for (i = 0; i < (sizeof (tlist) / sizeof (tlist[0])); i++)
+      register_translation (simple_table_translation (tlist[i].from,
+                                                      tlist[i].to,
+                                                      tlist[i].table));
+  }
+
+  set_host_charset (GDB_DEFAULT_HOST_CHARSET);
+  set_target_charset (GDB_DEFAULT_TARGET_CHARSET);
+
+  add_cmd ("charset", class_support, set_charset_command,
+           "Use CHARSET as the host and target character set.\n"
+           "The `host character set' is the one used by the system GDB is running on.\n"
+           "The `target character set' is the one used by the program being debugged.\n"
+           "You may only use supersets of ASCII for your host character set; GDB does\n"
+           "not support any others.\n"
+           "To see a list of the character sets GDB supports, type `set charset'\n"
+           "with no argument.",
+           &setlist);
+
+  add_cmd ("host-charset", class_support, set_host_charset_command,
+           "Use CHARSET as the host character set.\n"
+           "The `host character set' is the one used by the system GDB is running on.\n"
+           "You may only use supersets of ASCII for your host character set; GDB does\n"
+           "not support any others.\n"
+           "To see a list of the character sets GDB supports, type `set host-charset'\n"
+           "with no argument.",
+           &setlist);
+
+  add_cmd ("target-charset", class_support, set_target_charset_command,
+           "Use CHARSET as the target character set.\n"
+           "The `target character set' is the one used by the program being debugged.\n"
+           "GDB translates characters and strings between the host and target\n"
+           "character sets as needed.\n"
+           "To see a list of the character sets GDB supports, type `set target-charset'\n"
+           "with no argument.",
+           &setlist);
+
+  add_cmd ("charset", class_support, show_charset_command,
+           "Show the current host and target character sets.",
+           &showlist);
+  add_alias_cmd ("host-charset", "charset", class_alias, 1, &showlist);
+  add_alias_cmd ("target-charset", "charset", class_alias, 1, &showlist);
+}
Index: charset.h
===================================================================
RCS file: charset.h
diff -N charset.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ charset.h	12 Sep 2002 23:46:14 -0000
@@ -0,0 +1,120 @@
+/* Character set conversion support for GDB.
+   Copyright 2001 Free Software Foundation, Inc.
+
+   This file is part of GDB.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+#ifndef CHARSET_H
+#define CHARSET_H
+
+
+/* If the target program uses a different character set than the host,
+   GDB has some support for translating between the two; GDB converts
+   characters and strings to the host character set before displaying
+   them, and converts characters and strings appearing in expressions
+   entered by the user to the target character set.
+
+   At the moment, GDB only supports single-byte, stateless character
+   sets.  This includes the ISO-8859 family (ASCII extended with
+   accented characters, and (I think) Cyrillic, for European
+   languages), and the EBCDIC family (used on IBM's mainframes).
+   Unfortunately, it excludes many Asian scripts, the fixed- and
+   variable-width Unicode encodings, and other desireable things.
+   Patches are welcome!  (For example, it would be nice if the Java
+   string support could simply get absorbed into some more general
+   multi-byte encoding support.)
+
+   Furthermore, GDB's code pretty much assumes that the host character
+   set is some superset of ASCII; there are plenty if ('0' + n)
+   expressions and the like.
+
+   When the `iconv' library routine supports a character set meeting
+   the requirements above, it's easy to plug an entry into GDB's table
+   that uses iconv to handle the details.  */
+
+
+/* Set the host character set to CHARSET.  CHARSET must be a superset
+   of ASCII, since GDB's code assumes this.  */
+void set_host_charset (const char *charset);
+
+
+/* Set the target character set to CHARSET.  */
+void set_target_charset (const char *charset);
+
+
+/* Return the name of the current host/target character set.  The
+   result is owned by the charset module; the caller should not free
+   it.  */
+const char *host_charset (void);
+const char *target_charset (void);
+
+
+/* In general, the set of C backslash escapes (\n, \f) is specific to
+   the character set.  Not all character sets will have form feed
+   characters, for example.
+
+   The following functions allow GDB to parse and print control
+   characters in a character-set-independent way.  They are both
+   language-specific (to C and C++) and character-set-specific.
+   Putting them here is a compromise.  */
+
+
+/* If the target character TARGET_CHAR have a backslash escape in the
+   C language (i.e., a character like 'n' or 't'), return the host
+   character string that should follow the backslash.  Otherwise,
+   return zero.
+
+   When this function returns non-zero, the string it returns is
+   statically allocated; the caller is not responsible for freeing it.  */
+const char *c_target_char_has_backslash_escape (int target_char);
+
+
+/* If the host character HOST_CHAR is a valid backslash escape in the
+   C language for the target character set, return non-zero, and set
+   *TARGET_CHAR to the target character the backslash escape represents.
+   Otherwise, return zero.  */
+int c_parse_backslash (int host_char, int *target_char);
+
+
+/* Return non-zero if the host character HOST_CHAR can be printed
+   literally --- that is, if it can be readably printed as itself in a
+   character or string constant.  Return zero if it should be printed
+   using some kind of numeric escape, like '\031' in C, '^(25)' in
+   Chill, or #25 in Pascal.  */
+int host_char_print_literally (int host_char);
+
+
+/* If the host character HOST_CHAR has an equivalent in the target
+   character set, set *TARGET_CHAR to that equivalent, and return
+   non-zero.  Otherwise, return zero.  */
+int host_char_to_target (int host_char, int *target_char);
+
+
+/* If the target character TARGET_CHAR has an equivalent in the host
+   character set, set *HOST_CHAR to that equivalent, and return
+   non-zero.  Otherwise, return zero.  */
+int target_char_to_host (int target_char, int *host_char);
+
+
+/* If the target character TARGET_CHAR has a corresponding control
+   character (also in the target character set), set *TARGET_CTRL_CHAR
+   to the control character, and return non-zero.  Otherwise, return
+   zero.  */
+int target_char_to_control_char (int target_char, int *target_ctrl_char);
+
+
+#endif /* CHARSET_H */
Index: configure.in
===================================================================
RCS file: /cvs/src/src/gdb/configure.in,v
retrieving revision 1.90
diff -u -p -r1.90 configure.in
--- configure.in	14 Aug 2002 17:43:16 -0000	1.90
+++ configure.in	12 Sep 2002 23:46:16 -0000
@@ -1362,6 +1362,18 @@ AC_LINK_FILES($files, $links)
 dnl Check for exe extension set on certain hosts (e.g. Win32)
 AC_EXEEXT
 
+dnl  Detect the character set used by this host.
+
+dnl  At the moment, we just assume it's ISO-8859-1 (which is a
+dnl  superset of ASCII containing the characters needed for French,
+dnl  German, Spanish, Italian, and possibly others), but if were
+dnl  *were* to support any host character sets other than ISO-8859-1,
+dnl  here's where we'd detect it.
+AC_DEFINE(GDB_DEFAULT_HOST_CHARSET, "ISO-8859-1",
+          [Define to be a string naming the default host character set.])
+
+AM_ICONV
+
 AC_CONFIG_SUBDIRS($configdirs)
 AC_OUTPUT(Makefile .gdbinit:gdbinit.in,
 [
Index: utils.c
===================================================================
RCS file: /cvs/src/src/gdb/utils.c,v
retrieving revision 1.76
diff -u -p -r1.76 utils.c
--- utils.c	1 Aug 2002 17:18:33 -0000	1.76
+++ utils.c	12 Sep 2002 23:46:17 -0000
@@ -60,6 +60,7 @@
 #include "demangle.h"
 #include "expression.h"
 #include "language.h"
+#include "charset.h"
 #include "annotate.h"
 #include "filenames.h"
 
@@ -1282,6 +1283,23 @@ query (const char *ctlstr,...)
 }
 
 
+/* Print an error message saying that we couldn't make sense of a
+   \^mumble sequence in a string or character constant.  START and END
+   indicate a substring of some larger string that contains the
+   erroneous backslash sequence, missing the initial backslash.  */
+static NORETURN int
+no_control_char_error (const char *start, const char *end)
+{
+  int len = end - start;
+  char *copy = alloca (end - start + 1);
+
+  memcpy (copy, start, len);
+  copy[len] = '\0';
+
+  error ("There is no control character `\\%s' in the `%s' character set.",
+         copy, target_charset ());
+}
+
 /* Parse a C escape sequence.  STRING_PTR points to a variable
    containing a pointer to the string to parse.  That pointer
    should point to the character after the \.  That pointer
@@ -1300,37 +1318,55 @@ query (const char *ctlstr,...)
 int
 parse_escape (char **string_ptr)
 {
+  int target_char;
   register int c = *(*string_ptr)++;
-  switch (c)
+  if (c_parse_backslash (c, &target_char))
+    return target_char;
+  else switch (c)
     {
-    case 'a':
-      return 007;		/* Bell (alert) char */
-    case 'b':
-      return '\b';
-    case 'e':			/* Escape character */
-      return 033;
-    case 'f':
-      return '\f';
-    case 'n':
-      return '\n';
-    case 'r':
-      return '\r';
-    case 't':
-      return '\t';
-    case 'v':
-      return '\v';
     case '\n':
       return -2;
     case 0:
       (*string_ptr)--;
       return 0;
     case '^':
-      c = *(*string_ptr)++;
-      if (c == '\\')
-	c = parse_escape (string_ptr);
-      if (c == '?')
-	return 0177;
-      return (c & 0200) | (c & 037);
+      {
+        /* Remember where this escape sequence started, for reporting
+           errors.  */
+        char *sequence_start_pos = *string_ptr - 1;
+
+        c = *(*string_ptr)++;
+
+        if (c == '?')
+          {
+            /* XXXCHARSET: What is `delete' in the host character set?  */
+            c = 0177;
+
+            if (! host_char_to_target (c, &target_char))
+              error ("There is no character corresponding to `Delete' "
+                     "in the target character set `%s'.",
+                     host_charset ());
+
+            return target_char;
+          }
+        else if (c == '\\')
+          target_char = parse_escape (string_ptr);
+        else
+          {
+            if (! host_char_to_target (c, &target_char))
+              no_control_char_error (sequence_start_pos, *string_ptr);
+          }          
+
+        /* Now target_char is something like `c', and we want to find
+           its control-character equivalent.  */
+        if (! target_char_to_control_char (target_char, &target_char))
+          no_control_char_error (sequence_start_pos, *string_ptr);
+
+        return target_char;
+      }
+
+      /* XXXCHARSET: we need to use isdigit and value-of-digit
+         methods of the host character set here.  */
 
     case '0':
     case '1':
@@ -1359,7 +1395,12 @@ parse_escape (char **string_ptr)
 	return i;
       }
     default:
-      return c;
+      if (! host_char_to_target (c, &target_char))
+        error ("The escape sequence `\%c' is equivalent to plain `%c', which"
+               " has no equivalent\n"
+               "in the `%s' character set.",
+               c, c, target_charset ());
+      return target_char;
     }
 }
 
Index: doc/gdb.texinfo
===================================================================
RCS file: /cvs/src/src/gdb/doc/gdb.texinfo,v
retrieving revision 1.120
diff -u -p -r1.120 gdb.texinfo
--- doc/gdb.texinfo	5 Sep 2002 12:13:08 -0000	1.120
+++ doc/gdb.texinfo	12 Sep 2002 23:46:22 -0000
@@ -4421,6 +4421,8 @@ Table}.
 * Vector Unit::                 Vector Unit
 * Memory Region Attributes::    Memory region attributes
 * Dump/Restore Files::          Copy between memory and a file
+* Character Sets::              Debugging programs that use a different
+                                character set than GDB does
 @end menu
 
 @node Expressions
@@ -5806,6 +5808,250 @@ These offsets are relative to the addres
 the @var{bias} argument is applied.
 
 @end table
+
+@node Character Sets
+@section Character Sets
+@cindex character sets
+@cindex charset
+@cindex translating between character sets
+@cindex host character set
+@cindex target character set
+
+If the program you are debugging uses a different character set to
+represent characters and strings than the one @value{GDBN} uses itself,
+@value{GDBN} can automatically translate between the character sets for
+you.  The character set @value{GDBN} uses we call the @dfn{host
+character set}; the one the inferior program uses we call the
+@dfn{target character set}.
+
+For example, if you are running @value{GDBN} on a Linux system, which
+uses the ISO Latin 1 character set, but you are using @value{GDBN}'s
+remote protocol (@pxref{Remote,Remote Debugging}) to debug a program
+running on an IBM mainframe, which uses the @sc{ebcdic} character set,
+then the host character set is Latin-1, and the target character set is
+@sc{ebcdic}.  If you give @value{GDBN} the command @code{set
+target-charset ebcdic-us}, then @value{GDBN} translates between
+@sc{ebcdic} and Latin 1 as you print character or string values, or use
+character and string literals in expressions.
+
+@value{GDBN} has no way to automatically recognize which character set
+the inferior program uses; you must tell it, using the @code{set
+target-charset} command, described below.
+
+Here are the commands for controlling @value{GDBN}'s character set
+support:
+
+@table @code
+@item set target-charset @var{charset}
+@kindex set target-charset
+Set the current target character set to @var{charset}.  We list the
+character set names @value{GDBN} recognizes below, but if you invoke the
+@code{set target-charset} command with no argument, @value{GDBN} lists
+the character sets it supports.
+@end table
+
+@table @code
+@item set host-charset @var{charset}
+@kindex set host-charset
+Set the current host character set to @var{charset}.
+
+By default, @value{GDBN} uses a host character set appropriate to the
+system it is running on; you can override that default using the
+@code{set host-charset} command.
+
+@value{GDBN} can only use certain character sets as its host character
+set.  We list the character set names @value{GDBN} recognizes below, and
+indicate which can be host character sets, but if you invoke the
+@code{set host-charset} command with no argument, @value{GDBN} lists the
+character sets it supports, placing an asterisk (@samp{*}) after those
+it can use as a host character set.
+
+@item set charset @var{charset}
+@kindex set charset
+Set the current host and target character sets to @var{charset}.  If you
+invoke the @code{set charset} command with no argument, it lists the
+character sets it supports.  @value{GDBN} can only use certain character
+sets as its host character set; it marks those in the list with an
+asterisk (@samp{*}).
+
+@item show charset
+@itemx show host-charset
+@itemx show target-charset
+@kindex show charset
+@kindex show host-charset
+@kindex show target-charset
+Show the current host and target charsets.  The @code{show host-charset}
+and @code{show target-charset} commands are synonyms for @code{show
+charset}.
+
+@end table
+
+@value{GDBN} currently includes support for the following character
+sets:
+
+@table @code
+
+@item ASCII
+@cindex ASCII character set
+Seven-bit U.S. @sc{ascii}.  @value{GDBN} can use this as its host
+character set.
+
+@item ISO-8859-1
+@cindex ISO 8859-1 character set
+@cindex ISO Latin 1 character set
+The ISO Latin 1 character set.  This extends ASCII with accented
+characters needed for French, German, and Spanish.  @value{GDBN} can use
+this as its host character set.
+
+@item EBCDIC-US
+@itemx IBM1047
+@cindex EBCDIC character set
+@cindex IBM1047 character set
+Variants of the @sc{ebcdic} character set, used on some of IBM's
+mainframe operating systems.  (Linux on the S/390 uses U.S. @sc{ascii}.)
+@value{GDBN} cannot use these as its host character set.
+
+@end table
+
+Here is an example of @value{GDBN}'s character set support in action.
+Assume that the following source code has been placed in the file
+@file{charset-test.c}:
+
+@example
+#include <stdio.h>
+
+char ascii_hello[]
+  = @{72, 101, 108, 108, 111, 44, 32, 119,
+     111, 114, 108, 100, 33, 10, 0@};
+char ibm1047_hello[]
+  = @{200, 133, 147, 147, 150, 107, 64, 166,
+     150, 153, 147, 132, 90, 37, 0@};
+
+main ()
+@{
+  printf ("Hello, world!\n");
+@}
+@end example
+
+In this program, @code{ascii_hello} and @code{ibm1047_hello} are arrays
+containing the string @samp{Hello, world!} followed by a newline,
+encoded in the @sc{ascii} and @sc{ibm1047} character sets.
+
+We compile the program, and invoke the debugger on it:
+
+@example
+$ gcc -g charset-test.c -o charset-test
+$ gdb -nw charset-test
+GNU gdb 2001-12-19-cvs
+Copyright 2001 Free Software Foundation, Inc.
+@dots{}
+(gdb) 
+@end example
+
+We can use the @code{show charset} command to see what character sets
+@value{GDBN} is currently using to interpret and display characters and
+strings:
+
+@example
+(gdb) show charset
+The current host and target character set is `iso-8859-1'.
+(gdb) 
+@end example
+
+For the sake of printing this manual, let's use @sc{ascii} as our
+initial character set:
+@example
+(gdb) set charset ascii
+(gdb) show charset
+The current host and target character set is `ascii'.
+(gdb) 
+@end example
+
+Let's assume that @sc{ascii} is indeed the correct character set for our
+host system --- in other words, let's assume that if @value{GDBN} prints
+characters using the @sc{ascii} character set, our terminal will display
+them properly.  Since our current target character set is also
+@sc{ascii}, the contents of @code{ascii_hello} print legibly:
+
+@example
+(gdb) print ascii_hello
+$1 = 0x401698 "Hello, world!\n"
+(gdb) print ascii_hello[0]
+$2 = 72 'H'
+(gdb) 
+@end example
+
+@value{GDBN} uses the target character set for character and string
+literals you use in expressions:
+
+@example
+(gdb) print '+'
+$3 = 43 '+'
+(gdb) 
+@end example
+
+The @sc{ascii} character set uses the number 43 to encode the @samp{+}
+character.
+
+@value{GDBN} relies on the user to tell it which character set the
+target program uses.  If we print @code{ibm1047_hello} while our target
+character set is still @sc{ascii}, we get jibberish:
+
+@example
+(gdb) print ibm1047_hello
+$4 = 0x4016a8 "\310\205\223\223\226k@@\246\226\231\223\204Z%"
+(gdb) print ibm1047_hello[0]
+$5 = 200 '\310'
+(gdb) 
+@end example
+
+If we invoke the @code{set target-charset} command without an argument,
+@value{GDBN} tells us the character sets it supports:
+
+@example
+(gdb) set target-charset
+Valid character sets are:
+  ascii *
+  iso-8859-1 *
+  ebcdic-us  
+  ibm1047  
+* - can be used as a host character set
+@end example
+
+We can select @sc{ibm1047} as our target character set, and examine the
+program's strings again.  Now the @sc{ascii} string is wrong, but
+@value{GDBN} translates the contents of @code{ibm1047_hello} from the
+target character set, @sc{ibm1047}, to the host character set,
+@sc{ascii}, and they display correctly:
+
+@example
+(gdb) set target-charset ibm1047
+(gdb) show charset
+The current host character set is `ascii'.
+The current target character set is `ibm1047'.
+(gdb) print ascii_hello
+$6 = 0x401698 "\110\145%%?\054\040\167?\162%\144\041\012"
+(gdb) print ascii_hello[0]
+$7 = 72 '\110'
+(gdb) print ibm1047_hello
+$8 = 0x4016a8 "Hello, world!\n"
+(gdb) print ibm1047_hello[0]
+$9 = 200 'H'
+(gdb)
+@end example
+
+As above, @value{GDBN} uses the target character set for character and
+string literals you use in expressions:
+
+@example
+(gdb) print '+'
+$10 = 78 '+'
+(gdb) 
+@end example
+
+The IBM1047 character set uses the number 78 to encode the @samp{+}
+character.
+
 
 @node Macros
 @chapter C Preprocessor Macros
Index: testsuite/gdb.base/charset.c
===================================================================
RCS file: testsuite/gdb.base/charset.c
diff -N testsuite/gdb.base/charset.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gdb.base/charset.c	12 Sep 2002 23:46:23 -0000
@@ -0,0 +1,131 @@
+/* Test GDB's character set support
+   Jim Blandy <jimb@cygnus.com> --- December 2001 */
+
+#include <stdio.h>
+
+
+/* X_string is a null-terminated string in the X charset whose
+   elements are as follows.  X should be the name the `set charset'
+   command uses for the character set, in lower-case, with any
+   non-identifier characters replaced with underscores.  Where a
+   character set doesn't have the given character, the string should
+   contain the character 'x'.
+
+   [0] --- the `alert' character, '\a'
+   [1] --- the `backspace' character, '\b'
+   [2] --- the `escape' character, '\e'
+   [3] --- the `form feed' character, '\f'
+   [4] --- the `line feed' character, '\n'
+   [5] --- the `carriage return' character, '\r'
+   [6] --- the `horizontal tab' character, '\t'
+   [7] --- the `vertical tab' character, '\v'
+   [8  .. 33] --- the uppercase letters A-Z
+   [34 .. 59] --- the lowercase letters a-z
+   [60 .. 69] --- the digits 0-9
+   [70] --- the `cent' character
+   [71] --- a control character with no defined backslash escape
+
+   Feel free to extend these as you like.  */
+
+#define NUM_CHARS (72)
+
+char ascii_string[NUM_CHARS];
+char iso_8859_1_string[NUM_CHARS];
+char ebcdic_us_string[NUM_CHARS];
+char ibm1047_string[NUM_CHARS];
+
+
+void
+init_string (char string[],
+             char x,
+             char alert, char backspace, char escape, char form_feed,
+             char line_feed, char carriage_return, char horizontal_tab,
+             char vertical_tab, char cent, char misc_ctrl)
+{
+  memset (string, x, NUM_CHARS);
+  string[0] = alert;
+  string[1] = backspace;
+  string[2] = escape;
+  string[3] = form_feed;
+  string[4] = line_feed;
+  string[5] = carriage_return;
+  string[6] = horizontal_tab;
+  string[7] = vertical_tab;
+  string[70] = cent;
+  string[71] = misc_ctrl;
+}
+
+
+void
+fill_run (char string[], int start, int len, int first)
+{
+  int i;
+
+  for (i = 0; i < len; i++)
+    string[start + i] = first + i;
+}
+
+
+int main ()
+{
+#ifdef usestubs
+  set_debug_traps();
+  breakpoint();
+#endif
+  (void) malloc (1);
+  /* Initialize ascii_string.  */
+  init_string (ascii_string,
+               120,
+               7, 8, 27, 12,
+               10, 13, 9,
+               11, 120, 17);
+  fill_run (ascii_string, 8, 26, 65);
+  fill_run (ascii_string, 34, 26, 97);
+  fill_run (ascii_string, 60, 10, 48);
+
+  /* Initialize iso_8859_1_string.  */
+  init_string (iso_8859_1_string,
+               120,
+               7, 8, 27, 12,
+               10, 13, 9,
+               11, 162, 17);
+  fill_run (iso_8859_1_string, 8, 26, 65);
+  fill_run (iso_8859_1_string, 34, 26, 97);
+  fill_run (iso_8859_1_string, 60, 10, 48);
+
+  /* Initialize ebcdic_us_string.  */
+  init_string (ebcdic_us_string,
+               167,
+               47, 22, 39, 12,
+               37, 13, 5,
+               11, 74, 17);
+  /* In EBCDIC, the upper-case letters are broken into three separate runs.  */
+  fill_run (ebcdic_us_string, 8, 9, 193);
+  fill_run (ebcdic_us_string, 17, 9, 209);
+  fill_run (ebcdic_us_string, 26, 8, 226);
+  /* The lower-case letters are, too.  */
+  fill_run (ebcdic_us_string, 34, 9, 129);
+  fill_run (ebcdic_us_string, 43, 9, 145);
+  fill_run (ebcdic_us_string, 52, 8, 162);
+  /* The digits, at least, are contiguous.  */
+  fill_run (ebcdic_us_string, 60, 10, 240);
+
+  /* Initialize ibm1047_string.  */
+  init_string (ibm1047_string,
+               167,
+               47, 22, 39, 12,
+               37, 13, 5,
+               11, 74, 17);
+  /* In EBCDIC, the upper-case letters are broken into three separate runs.  */
+  fill_run (ibm1047_string, 8, 9, 193);
+  fill_run (ibm1047_string, 17, 9, 209);
+  fill_run (ibm1047_string, 26, 8, 226);
+  /* The lower-case letters are, too.  */
+  fill_run (ibm1047_string, 34, 9, 129);
+  fill_run (ibm1047_string, 43, 9, 145);
+  fill_run (ibm1047_string, 52, 8, 162);
+  /* The digits, at least, are contiguous.  */
+  fill_run (ibm1047_string, 60, 10, 240);
+
+  puts ("All set!");            /* all strings initialized */
+}
Index: testsuite/gdb.base/charset.exp
===================================================================
RCS file: testsuite/gdb.base/charset.exp
diff -N testsuite/gdb.base/charset.exp
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ testsuite/gdb.base/charset.exp	12 Sep 2002 23:46:23 -0000
@@ -0,0 +1,486 @@
+# Copyright 2001 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  
+
+# Please email any bugs, comments, and/or additions to this file to:
+# bug-gdb@prep.ai.mit.edu
+
+# Test GDB's character set support.
+
+if $tracelevel then {
+	strace $tracelevel
+}
+
+set prms_id 0
+set bug_id 0
+
+set testfile "charset"
+set srcfile ${testfile}.c
+set binfile ${objdir}/${subdir}/${testfile}
+if  { [gdb_compile "${srcdir}/${subdir}/${srcfile}" "${binfile}" executable {debug}] != "" } {
+     gdb_suppress_entire_file "Testcase compile failed, so all tests in this file will automatically fail."
+}
+
+# Start with a fresh gdb.
+gdb_exit
+gdb_start
+gdb_reinitialize_dir $srcdir/$subdir
+gdb_load ${binfile}
+
+# Parse the output from a `show charset' command.  Return the host
+# and target charset as a two-element list.
+proc parse_show_charset_output {testname} {
+    global gdb_prompt
+
+    gdb_expect {
+        -re "The current host and target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" {
+            set host_charset $expect_out(1,string)
+            set target_charset $expect_out(1,string)
+            pass $testname
+        }
+        -re "The current host character set is `(.*)'\\.\[\r\n\]+The current target character set is `(.*)'\\.\[\r\n\]+$gdb_prompt $" {
+            set host_charset $expect_out(1,string)
+            set target_charset $expect_out(2,string)
+            pass $testname
+        }
+        -re ".*$gdb_prompt $" {
+            fail $testname
+        }
+        timeout {
+            fail "$testname (timeout)"
+        }
+    }
+
+    return [list $host_charset $target_charset]
+}
+
+
+# Try the various `show charset' commands.  These are all aliases of each
+# other; `show target-charset' and `show host-charset' actually print
+# both the host and target charsets.
+
+send_gdb "show charset\n"
+set show_charset [parse_show_charset_output "show charset"]
+
+send_gdb "show target-charset\n"
+set show_target_charset [parse_show_charset_output "show target-charset"]
+
+if {! [string compare $show_charset $show_target_charset]} {
+    pass "check `show target-charset' against `show charset'"
+} else {
+    fail "check `show target-charset' against `show charset'"
+}
+
+send_gdb "show host-charset\n"
+set show_host_charset [parse_show_charset_output "show host-charset"]
+
+if {! [string compare $show_charset $show_host_charset]} {
+    pass "check `show host-charset' against `show charset'"
+} else {
+    fail "check `show host-charset' against `show charset'"
+}
+
+
+# Get the list of supported charsets.
+send_gdb "set charset\n"
+
+# True iff we've seen the "Valid character sets are:" message.
+set seen_valid 0
+
+# True iff we've seen the "can be used as a host character set" message.
+set seen_can_host 0
+
+# A Tcl array mapping the names of all the character sets we've seen
+# to "1" if the character set can be used as a host character set, or
+# "0" otherwise.  We can use `array names charsets' just to get a list
+# of all character sets.
+array set charsets {}
+
+proc all_charset_names {} {
+    global charsets
+    return [array names charsets]
+}
+
+proc charset_exists {charset} {
+    global charsets
+    return [info exists charsets($charset)]
+}
+
+proc valid_host_charset {charset} {
+    global charsets
+    return $charsets($charset)
+}
+
+gdb_expect {
+    -re "Valid character sets are:\[\r\n\]+" {
+        # There's no ^ at the beginning of the pattern above, so that
+        # expect can skip the echoed `set charset' command.
+        set seen_valid 1
+        exp_continue
+    }
+    -re "^  (\[^ \t\n\]*) \\*\[\r\n\]+" {
+        set charsets($expect_out(1,string)) 1
+        exp_continue
+    }
+    -re "^  (\[^ \t\n\]*)\[ \t\]*\[\r\n\]+" {
+        set charsets($expect_out(1,string)) 0
+        exp_continue
+    }
+    -re "^\\* - can be used as a host character set\[\r\n\]+" {
+        set seen_can_host 1
+        exp_continue
+    }
+    -re ".*${gdb_prompt} $" {
+        # We don't do an exp_continue here.
+    }
+    timeout {
+        fail "get valid character sets (timeout)"
+    }
+}
+
+
+# Check that we've seen all the right pieces of the output, and that
+# we can at least use ASCII as a host character set.
+if {$seen_valid && $seen_can_host && [charset_exists ascii]} {
+    # We can't do the below as part of the test above, since all the
+    # [] substitution takes place before any expression evaluation
+    # takes place; && doesn't really short circuit things the way
+    # you'd like.  We'd get an "can't read $charsets(ascii)" error
+    # even when `info exists' had returned zero.
+    if {[valid_host_charset ascii]} {
+        pass "get valid character sets"
+    } else {
+        fail "get valid character sets"
+    }
+} else {
+    fail "get valid character sets (no ascii charset)"
+}
+
+
+# Try using `set host-charset' on an invalid character set.
+gdb_test "set host-charset my_grandma_bonnie" \
+         "GDB doesn't know of any character set named `my_grandma_bonnie'." \
+         "try `set host-charset' with invalid charset"
+
+
+# Try using `set target-charset' on an invalid character set.
+gdb_test "set target-charset my_grandma_bonnie" \
+         "GDB doesn't know of any character set named `my_grandma_bonnie'." \
+         "try `set target-charset' with invalid charset"
+
+
+# Make sure that GDB supports every host/target charset combination.
+foreach host_charset [all_charset_names] {
+    if {[valid_host_charset $host_charset]} {
+
+        set testname "try `set host-charset $host_charset'"
+        send_gdb "set host-charset $host_charset\n"
+        gdb_expect {
+            -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" {
+                # How did it get into `charsets' then?
+                fail "$testname (didn't recognize name)"
+            }
+            -re "GDB can't use `.*' as its host character set\\.\[\r\n]+${gdb_prompt} $" {
+                # Well, then why does its `charsets' entry say it can?
+                fail $testname
+            }
+            -re "${gdb_prompt} $" {
+                pass $testname
+            }
+            timeout {
+                fail "$testname (timeout)"
+            }
+        }
+
+        # Check that the command actually had its intended effect:
+        # $host_charset should now be the host character set.
+        send_gdb "show charset\n"
+        set result [parse_show_charset_output "parse `show charset' after `set host-charset $host_charset'"]
+        if {! [string compare [lindex $result 0] $host_charset]} {
+            pass "check effect of `set host-charset $host_charset'"
+        } else {
+            fail "check effect of `set host-charset $host_charset'"
+        }
+
+        # Now try setting every possible target character set,
+        # given that host charset.
+        foreach target_charset [all_charset_names] {
+            set testname "try `set target-charset $target_charset'"
+            send_gdb "set target-charset $target_charset\n"
+            gdb_expect {
+                -re "GDB doesn't know of any character set named.*\[\r\n]+${gdb_prompt} $" {
+                    fail "$testname (didn't recognize name)"
+                }
+                -re "GDB can't convert from the .* character set to .*\\.\[\r\n\]+${gdb_prompt} $" {
+                    # This is a serious problem.  GDB should be able to convert
+                    # between any arbitrary pair of character sets.
+                    fail "$testname (can't convert)"
+                }
+                -re "${gdb_prompt} $" {
+                    pass $testname
+                }
+                timeout {
+                    fail "$testname (timeout)"
+                }
+            }
+
+            # Check that the command actually had its intended effect:
+            # $target_charset should now be the target charset.
+            send_gdb "show charset\n"
+            set result [parse_show_charset_output "parse `show charset' after `set target-charset $target_charset'"]
+            if {! [string compare $result [list $host_charset $target_charset]]} {
+                pass "check effect of `set target-charset $target_charset'"
+            } else {
+                fail "check effect of `set target-charset $target_charset'"
+            }
+
+            # Test handling of characters in the host charset which
+            # can't be translated into the target charset.  \xA2 is
+            # `cent' in ISO-8859-1, which has no equivalent in ASCII.
+            #
+            # On some systems, the pseudo-tty through which we
+            # communicate with GDB insists on stripping the high bit
+            # from input characters, meaning that `cent' turns into
+            # `"'.  Since ISO-8859-1 and ASCII are identical in the
+            # lower 128 characters, it's tough to see how we can test
+            # this behavior on such systems, so we just xfail it.
+	    #
+	    # Note: the \x16 (Control-V) is an escape to allow \xA2 to
+	    # get past readline.
+            if {! [string compare $host_charset iso-8859-1] && ! [string compare $target_charset ascii]} {
+
+                set testname "untranslatable character in character literal"
+                send_gdb "print '\x16\xA2'\n"
+                gdb_expect {
+                    -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" {
+                        pass $testname
+                    }
+                    -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" {
+                        xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
+                    }
+                    -re "$gdb_prompt $" {
+                        fail $testname
+                    }
+                    timeout {
+                        fail "$testname (timeout)"
+                    }
+                }
+
+                set testname "untranslatable character in string literal"
+                # If the PTTY zeros bit seven, then this turns into
+                #   print """
+                # which gets us a syntax error.  We don't care.
+                send_gdb "print \"\x16\xA2\"\n"
+                gdb_expect {
+                    -re "There is no character corresponding to .* in the target character set .*\\.\[\r\n\]+$gdb_prompt $" {
+                        pass $testname
+                    }
+                    -re "Unterminated string in expression.\[\r\n\]+$gdb_prompt $" {
+                        xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
+                    }
+                    -re "$gdb_prompt $" {
+                        fail $testname
+                    }
+                    timeout {
+                        fail "$testname (timeout)"
+                    }
+                }
+
+                set testname "untranslatable characters in backslash escape"
+                send_gdb "print '\\\x16\xA2'\n"
+                gdb_expect {
+                    -re "The escape sequence .* is equivalent to plain .*, which has no equivalent\[\r\n\]+in the .* character set\\.\[\r\n\]+$gdb_prompt $" {
+                        pass $testname
+                    }
+                    -re " = 34 '\"'\[\r\n\]+$gdb_prompt $" {
+                        xfail "$testname (DejaGNU's pseudo-tty strips eighth bit)"
+                    }
+                    -re "$gdb_prompt $" {
+                        fail $testname
+                    }
+                    timeout {
+                        fail "$testname (timeout)"
+                    }
+                }
+            }
+        }
+    }
+}
+
+
+# Set the host character set to plain ASCII, and try actually printing
+# some strings in various target character sets.  We need to run the
+# test program to the point at which the strings have been
+# initialized.
+gdb_test "break [gdb_get_line_number "all strings initialized"]" \
+         ".*Breakpoint.* at .*" \
+         "set breakpoint after all strings have been initialized"
+gdb_run_cmd
+gdb_expect {
+    -re "Breakpoint.*all strings initialized.*$gdb_prompt $" {
+        pass "run until all strings have been initialized"
+    }
+    -re "$gdb_prompt $" {
+        fail "run until all strings have been initialized"
+    }
+    timeout {
+        fail "run until all strings have been initialized (timeout)"
+    }
+}
+
+
+gdb_test "set host-charset ascii" ""
+foreach target_charset [all_charset_names] {
+    send_gdb "set target-charset $target_charset\n" 
+    gdb_expect {
+        -re "$gdb_prompt $" {
+            pass "set target-charset $target_charset"
+        }
+        timeout {
+            fail "set target-charset $target_charset (timeout)"
+        }
+    }
+
+    # Try printing the null character.  There seems to be a bug in
+    # gdb_test that requires us to use gdb_expect here.
+    send_gdb "print '\\0'\n"
+    gdb_expect {
+        -re "\\\$${decimal} = 0 '\\\\0'\[\r\n\]+$gdb_prompt $" {
+            pass "print the null character in ${target_charset}"
+        }
+        -re "$gdb_prompt $" {
+            fail "print the null character in ${target_charset}"
+        }
+        timeout {
+            fail "print the null character in ${target_charset} (timeout)"
+        }
+    }
+
+    # Compute the name of the variable in the test program that holds
+    # a string in $target_charset.  The variable's name is the
+    # character set's name, in lower-case, with all non-identifier
+    # characters replaced with '_', with "_string" stuck on the end.
+    set var_name [string tolower "${target_charset}_string"]
+    regsub -all -- "\[^a-z0-9_\]" $var_name "_" var_name
+    
+    # Compute a regexp matching the results we expect.  This is static,
+    # but it's easier than writing it out.
+    regsub -all "." "abefnrtv" "(\\\\&|x)" escapes
+    set uppercase "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    set lowercase "abcdefghijklmnopqrstuvwxyz"
+    set digits "0123456789"
+    set octal_escape "\\\\\[0-9\]\[0-9\]\[0-9\]"
+
+    send_gdb "print $var_name\n"
+    # ${escapes}${uppercase}${lowercase}${digits}${octal}${octal}
+    gdb_expect {
+        -re ".* = \"(\\\\a|x)(\\\\b|x)(\\\\e|x)(\\\\f|x)(\\\\n|x)(\\\\r|x)(\\\\t|x)(\\\\v|x)${uppercase}${lowercase}${digits}(\\\\\[0-9\]\[0-9\]\[0-9\]|x)(\\\\\[0-9\]\[0-9\]\[0-9\]|x).*\"\[\r\n\]+$gdb_prompt $" {
+            pass "print string in $target_charset"
+        }
+        -re "$gdb_prompt $" {
+            fail "print string in $target_charset"
+        }
+        timeout {
+            fail "print string in $target_charset (timeout)"
+        }
+    }
+
+    # Try entering a character literal, and see if it comes back unchanged.
+    gdb_test "print 'A'" \
+             " = \[0-9-\]+ 'A'" \
+             "parse character literal in ${target_charset}"
+
+    # Check that the character literal was encoded correctly.
+    gdb_test "print 'A' == $var_name\[8\]" \
+             " = 1" \
+             "check value of parsed character literal in ${target_charset}"
+
+    # Try entering a string literal, and see if it comes back unchanged.
+    gdb_test "print \"abcdefABCDEF012345\"" \
+             " = \"abcdefABCDEF012345\"" \
+             "parse string literal in ${target_charset}"
+
+    # Check that the string literal was encoded correctly.
+    gdb_test "print \"q\"\[0\] == $var_name\[50\]" \
+             " = 1" \
+             "check value of parsed string literal in ${target_charset}"
+
+    # Test handling of characters in the target charset which
+    # can't be translated into the host charset.
+    if {! [string compare $target_charset iso-8859-1]} {
+        gdb_test "print iso_8859_1_string\[70\]" \
+                 " = \[0-9-\]+ '\\\\242'" \
+                 "print character with no equivalent in host character set"
+        gdb_test "print iso_8859_1_string + 70" \
+                 " = ${hex} \"\\\\242.*\"" \
+                 "print string with no equivalent in host character set"
+    }
+
+    # Make sure that we don't apply the ISO-8859-1 `print_literally'
+    # function to ASCII.
+    if {! [string compare $target_charset ascii]} {
+        gdb_test "print iso_8859_1_string\[70\]" \
+                 " = \[0-9-\]+ '\\\\242'" \
+                 "print ASCII unprintable character"
+        gdb_test "print iso_8859_1_string + 70" \
+                 " = ${hex} \"\\\\242.*\"" \
+                 "print ASCII unprintable string"
+    }
+
+    # Try printing characters with backslash escape equivalents.
+    set escapees {a b e f n r t v}
+    for {set i 0} {$i < [llength $escapees]} {incr i} {
+        set escape [lindex $escapees $i]
+        send_gdb "print $var_name\[$i\]\n"
+        set have_escape 1
+        gdb_expect {
+            -re "= \[0-9-\]+ '\\\\${escape}'\[\r\n\]+$gdb_prompt $" {
+                pass "try printing '\\${escape}' in ${target_charset}"
+            }
+            -re "= \[0-9-\]+ 'x'\[\r\n\]+$gdb_prompt $" {
+                xfail "try printing '\\${escape}' in ${target_charset} (no such escape)"
+                set have_escape 0
+            }
+            -re "$gdb_prompt $" {
+                fail "try printing '\\${escape}' in ${target_charset}"
+            }
+            timeout {
+                fail "try printing '\\${escape}' in ${target_charset} (timeout)"
+            }
+        }
+
+        if {$have_escape} {
+
+            # Try parsing a backslash escape in a character literal.
+            gdb_test "print '\\${escape}' == $var_name\[$i\]" \
+                     " = 1" \
+                     "check value of '\\${escape}' in ${target_charset}"
+
+            # Try parsing a backslash escape in a string literal.
+            gdb_test "print \"\\${escape}\"\[0\] == $var_name\[$i\]" \
+                     " = 1" \
+                     "check value of \"\\${escape}\" in ${target_charset}"
+        }
+    }
+
+    # Try printing a character escape that doesn't exist.  We should 
+    # get the unescaped character, in the target character set.
+    gdb_test "print '\\q'" " = \[0-9-\]+ 'q'" \
+             "print escape that doesn't exist in $target_charset"
+    gdb_test "print '\\q' == $var_name\[50\]" " = 1" \
+             "check value of escape that doesn't exist in $target_charset"
+}
+
+gdb_exit 


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]