This is the mail archive of the gdb-patches@sources.redhat.com mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

D Symbol Demangling


Greetings GDB hackers!

I'm new to GDB programming, so please excuse any stupid questions.

There is a language called D which, like C++, uses symbol mangling since
it supports things such as method overloading.  I've been attempting to
add support to GDB to demangle the names.

Thus far, I've had partial success.  In fairly simple D programs, my
demangling works, but in more complex programs with mixed C and D code
(D is link-compatible with C) it only calls the D demangler for some of
the functions.

Unfortunately, I'm not very familiar with GDB's architecture, so I've
been basically wandering around in the dark on this one. Any help you
could provide would be appreciated.

Attached is my patch against GDB 6.3.  It is not the cleanest code right
now, and certainly not anywhere near ready for a release... I'm just
trying to get it to work, and feeling out the GDB code.  Once it's
working, I'll refactor it into something decent.

Thanks
John Demme

BTW- more information about D can be found at
http://www.digitalmars.com/d and there is a forum for D GDB patches at
http://www.dsource.org/forums/viewforum.php?f=58

Index: gdb/symtab.c
===================================================================
--- gdb/symtab.c	(revision 7)
+++ gdb/symtab.c	(working copy)
@@ -42,6 +42,7 @@
 #include "filenames.h"		/* for FILENAME_CMP */
 #include "objc-lang.h"
 #include "ada-lang.h"
+#include "c-lang.h"
 
 #include "hashtab.h"
 
@@ -395,7 +396,7 @@
   return (mangled_name);
 }
 
-
+
 /* Initialize the language dependent portion of a symbol
    depending upon the language for the symbol. */
 void
@@ -404,6 +405,7 @@
 {
   gsymbol->language = language;
   if (gsymbol->language == language_cplus
+      || gsymbol->language == language_d
       || gsymbol->language == language_java
       || gsymbol->language == language_objc)
     {
@@ -450,6 +452,15 @@
   if (gsymbol->language == language_unknown)
     gsymbol->language = language_auto;
 
+  if (gsymbol->language == language_d
+      || gsymbol->language == language_auto) {
+    demangled = d_demangle(mangled, 0);
+    if (demangled != NULL) {
+      gsymbol->language = language_d;
+      return demangled;
+    }
+  }
+
   if (gsymbol->language == language_objc
       || gsymbol->language == language_auto)
     {
@@ -609,6 +620,7 @@
 
   demangled = symbol_find_demangled_name (gsymbol, mangled);
   if (gsymbol->language == language_cplus
+      || gsymbol->language == language_d
       || gsymbol->language == language_java
       || gsymbol->language == language_objc)
     {
@@ -638,6 +650,7 @@
   switch (gsymbol->language) 
     {
     case language_cplus:
+    case language_d:
     case language_java:
     case language_objc:
       if (gsymbol->language_specific.cplus_specific.demangled_name != NULL)
@@ -663,6 +676,7 @@
   switch (gsymbol->language) 
     {
     case language_cplus:
+    case language_d:
     case language_java:
     case language_objc:
       if (gsymbol->language_specific.cplus_specific.demangled_name != NULL)
@@ -1020,7 +1034,7 @@
 
   modified_name = name;
 
-  /* If we are using C++ or Java, demangle the name before doing a lookup, so
+  /* If we are using C++, D, or Java, demangle the name before doing a lookup, so
      we can always binary search. */
   if (current_language->la_language == language_cplus)
     {
@@ -1032,6 +1046,16 @@
 	  needtofreename = 1;
 	}
     }
+  else if (current_language->la_language == language_d)
+    {
+      demangled_name = d_demangle (name, 0);
+      if (demangled_name)
+	{
+	  mangled_name = name;
+	  modified_name = demangled_name;
+	  needtofreename = 1;
+	}
+    }
   else if (current_language->la_language == language_java)
     {
       demangled_name = cplus_demangle (name, 
Index: gdb/c-lang.c
===================================================================
--- gdb/c-lang.c	(revision 7)
+++ gdb/c-lang.c	(working copy)
@@ -696,9 +696,214 @@
    a language currently not supported by GDB.  */
 
 const struct language_defn minimal_language_defn =
+ {
+   "minimal",			/* Language name */
+   language_minimal,
+   NULL,
+   range_check_off,
+   type_check_off,
+   case_sensitive_on,
+   array_row_major,
+   &exp_descriptor_standard,
+   c_preprocess_and_parse,
+   c_error,
+   null_post_parser,
+   c_printchar,			/* Print a character constant */
+   c_printstr,			/* Function to print string constant */
+   c_emit_char,			/* Print a single char */
+   c_create_fundamental_type,	/* Create fundamental type in this language */
+   c_print_type,			/* Print a type using appropriate syntax */
+   c_val_print,			/* Print a value using appropriate syntax */
+   c_value_print,		/* Print a top-level value */
+   NULL,				/* Language specific skip_trampoline */
+   NULL,				/* value_of_this */
+  basic_lookup_symbol_nonlocal,	/* lookup_symbol_nonlocal */
+  basic_lookup_transparent_type,/* lookup_transparent_type */
+  NULL,				/* Language specific symbol demangler */
+  NULL,				/* Language specific class_name_from_physname */
+  c_op_print_tab,		/* expression operators for printing */
+  1,				/* c-style arrays */
+  0,				/* String lower bound */
+  NULL,
+  default_word_break_characters,
+  c_language_arch_info,
+  LANG_MAGIC
+};
+
+
+/*****************************
+ D Language stuff
+******************************/
+#include <string.h>
+#include <ctype.h>
+
+static int extractidentifiers(char** output, char** mangled) {
+  int i = -1;
+  while (isdigit(**mangled)) {
+    i = strtol(*mangled, mangled, 10);
+    if (strlen(*mangled) < i)
+      return -1;
+    memcpy(*output, *mangled, i);
+    *mangled += i;
+    *output += i + 1;
+    (*output)[-1] = '.';
+  }
+  if (**mangled == '\0' || i == -1)
+    return -1;
+  (*output)--;
+  return 1;
+}
+
+static void append(char** dest, char* src) {
+  int i = strlen(src);
+  for(;i>0; i--) {
+    *(*dest)++ = *src++;
+  }
+}
+
+static int extracttypeinfo(char** dest, char** id) {
+  if (**id == '\0')
+    return -1;
+  // Extract the type info:
+  switch (*(*id)++) {
+    // array, static array, dynamic array:
+  case 'A': case 'G': case 'H':
+    if (extracttypeinfo(dest, id) == -1)
+      return -1;
+    append(dest, "[]");
+    return 1;
+    // pointer:
+  case 'P':
+    if (extracttypeinfo(dest, id) == -1)
+      return -1;
+    append(dest, "*");
+    return 1;
+    // reference:
+  case 'R':
+    if (extracttypeinfo(dest, id) == -1)
+      return -1;
+    append(dest, "&");
+    return 1;
+    // return value:
+  case 'Z':
+    return extracttypeinfo(dest, id);
+    // out:
+  case 'J':
+    append(dest, "out ");
+    return extracttypeinfo(dest, id);
+    // inout:
+  case 'K':
+    append(dest, "inout ");
+    return extracttypeinfo(dest, id);
+    
+    // enum:
+  case 'E': case 'T': case 'D': case 'C': case 'S': case 'I':
+    return extractidentifiers(dest, id);
+    
+    // basic types:
+  case 'n': append(dest, "none"); return 1;  // ever used?
+  case 'v': append(dest, "void"); return 1;
+  case 'g': append(dest, "byte"); return 1;
+  case 'h': append(dest, "ubyte"); return 1;
+  case 's': append(dest, "short"); return 1;
+  case 't': append(dest, "ushort"); return 1;
+  case 'i': append(dest, "int"); return 1;
+  case 'k': append(dest, "uint"); return 1;
+  case 'l': append(dest, "long"); return 1;
+  case 'm': append(dest, "ulong"); return 1;
+  case 'f': append(dest, "float"); return 1;
+  case 'd': append(dest, "double"); return 1;
+  case 'e': append(dest, "real"); return 1;
+
+  // imaginary and complex:
+  case 'o': append(dest, "ifloat"); return 1;
+  case 'p': append(dest, "idouble"); return 1;
+  case 'j': append(dest, "ireal"); return 1;
+  case 'q': append(dest, "cfloat"); return 1;
+  case 'r': append(dest, "cdouble"); return 1;
+  case 'c': append(dest, "creal"); return 1;
+
+  // other types:
+  case 'b': append(dest, "bit"); return 1;
+  case 'a': append(dest, "char"); return 1;
+  case 'u': append(dest, "wchar"); return 1;
+  case 'w': append(dest, "dchar"); return 1;
+
+  // typeinfo, error, instance:
+  case '@': return extractidentifiers(dest, id); // BUG: is this right?
+
+  default: append(dest, "unknown"); return 1;
+  }
+}
+
+char* d_demangle(const char* mangled, int options) {
+  char *symbol = mangled;
+  char *output = malloc(strlen(mangled)+20), *orig = output;
+  unsigned char isFunc = 0;
+  if (mangled == NULL) {
+    free(output);
+    return NULL;
+  } else if (strcmp(mangled, "_Dmain") == 0) {
+    free(output);
+    return strdup("D main");
+  }
+  if (symbol == strstr(symbol, "_D")) {
+    symbol += 2;
+    isFunc = 1;
+  } else if (symbol == strstr(symbol, "__Class_")) {
+    symbol += 8;
+  } else if (symbol == strstr(symbol, "__init_")) {
+    symbol += 7;
+  } else if (symbol == strstr(symbol, "__vtbl_")) {
+    symbol += 7;
+  } else if (symbol == strstr(symbol, "__modctor_")) {
+    symbol += 10;
+  } else if (symbol == strstr(symbol, "__moddtor_")) {
+    symbol += 10;
+  } else if (symbol == strstr(symbol, "__ModuleInfo_")) {
+    symbol += 13;
+  } else {
+    free(orig);
+    return NULL;
+  }
+
+  if (extractidentifiers(&output, &symbol) < 0) {
+    free(orig);
+    return NULL;
+  }
+  append(&output, "(");
+  if (isFunc == 1 && *symbol == 'F') {
+    symbol++;
+    while (*symbol != '\0' && *symbol != 'Z') {
+      if (isFunc == 1) {
+	isFunc++;
+      } else {
+	append(&output, ", ");
+      }
+      if (extracttypeinfo(&output, &symbol) < 0) {
+	free(orig);
+	return NULL;
+      }
+    }
+  }
+  append(&output, ")");
+
+  //Doesn't display the return type, but wouldn't be too hard to do.
+  
+  *output = '\0';
+  output = strdup(orig);
+  free(orig);
+  return output;
+}
+
+char* d_sym_demangle(const struct general_symbol_info *gsymbol) {
+  return d_demangle(gsymbol->name, 0);
+}
+
+const struct language_defn d_language_defn =
 {
-  "minimal",			/* Language name */
-  language_minimal,
+  "d",				/* Language name */
+  language_d,
   NULL,
   range_check_off,
   type_check_off,
@@ -719,7 +924,7 @@
   NULL,				/* value_of_this */
   basic_lookup_symbol_nonlocal,	/* lookup_symbol_nonlocal */
   basic_lookup_transparent_type,/* lookup_transparent_type */
-  NULL,				/* Language specific symbol demangler */
+  d_demangle,			/* Language specific symbol demangler */
   NULL,				/* Language specific class_name_from_physname */
   c_op_print_tab,		/* expression operators for printing */
   1,				/* c-style arrays */
@@ -733,7 +938,8 @@
 void
 _initialize_c_language (void)
 {
-  add_language (&c_language_defn);
+  add_language (&c_language_defn); 
+  add_language (&d_language_defn);
   add_language (&cplus_language_defn);
   add_language (&asm_language_defn);
   add_language (&minimal_language_defn);
Index: gdb/language.c
===================================================================
--- gdb/language.c	(revision 7)
+++ gdb/language.c	(working copy)
@@ -553,6 +553,7 @@
     {
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       if (TYPE_CODE (t1) == TYPE_CODE_FLT)
 	return TYPE_CODE (t2) == TYPE_CODE_FLT && l2 > l1 ?
@@ -664,6 +665,7 @@
     {
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       return (TYPE_CODE (type) != TYPE_CODE_INT) &&
 	(TYPE_CODE (type) != TYPE_CODE_ENUM) ? 0 : 1;
@@ -704,6 +706,7 @@
 
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       return (TYPE_CODE (type) == TYPE_CODE_INT) &&
 	TYPE_LENGTH (type) == sizeof (char)
@@ -726,6 +729,7 @@
 
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       /* C does not have distinct string type. */
       return (0);
@@ -745,6 +749,7 @@
     {
     case language_c:
     case language_cplus:
+    case language_d:
     case language_objc:
       /* Might be more cleanly handled by having a
          TYPE_CODE_INT_NOT_BOOL for (the deleted) CHILL and such
@@ -818,6 +823,7 @@
 	}
       return builtin_type_f_logical_s2;
     case language_cplus:
+    case language_d:
     case language_pascal:
       if (current_language->la_language==language_cplus)
         {sym = lookup_symbol ("bool", NULL, VAR_DOMAIN, NULL, NULL);}
Index: gdb/c-lang.h
===================================================================
--- gdb/c-lang.h	(revision 7)
+++ gdb/c-lang.h	(working copy)
@@ -28,6 +28,7 @@
 
 #include "value.h"
 #include "macroexp.h"
+#include "symtab.h"
 
 
 extern int c_parse (void);	/* Defined in c-exp.y */
@@ -90,4 +91,13 @@
 extern int cp_is_vtbl_member (struct type *);
 
 
+/*****************************
+ D Language stuff
+******************************/
+
+char* d_demangle(const char* mangled, int options);
+
+char* d_sym_demangle(const struct general_symbol_info *gsymbol);
+
+
 #endif /* !defined (C_LANG_H) */
Index: gdb/defs.h
===================================================================
--- gdb/defs.h	(revision 7)
+++ gdb/defs.h	(working copy)
@@ -190,6 +190,7 @@
     language_auto,		/* Placeholder for automatic setting */
     language_c,			/* C */
     language_cplus,		/* C++ */
+    language_d,                 /* D */
     language_objc,		/* Objective-C */
     language_java,		/* Java */
     language_fortran,		/* Fortran */
Index: gdb/symfile.c
===================================================================
--- gdb/symfile.c	(revision 7)
+++ gdb/symfile.c	(working copy)
@@ -2169,6 +2169,7 @@
       filename_language_table =
 	xmalloc (fl_table_size * sizeof (*filename_language_table));
       add_filename_language (".c", language_c);
+      add_filename_language (".d", language_d);
       add_filename_language (".C", language_cplus);
       add_filename_language (".cc", language_cplus);
       add_filename_language (".cp", language_cplus);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]