This is the mail archive of the gdb-patches@sourceware.cygnus.com mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Re: Patch: hash tables for msymbols



This looks good; please commit it to sourceware.

However, I do think that big macros like MSYMBOL_HASH_ADD and
MSYMBOL_DEMANGLED_HASH_ADD are not too great.

I'd prefer either:
- defining one function which takes a minsym and one of the hash
  tables --- either msymbol_hash or msymbol_demangled_hash --- as
  arguments, and does the insertion, or
- just stick the code in directly where it's called.


> Recently I looked at a speed problem in gdb.  Some operations, such as
> printing a large C++ structure (in a large program), were very slow
> the first time.  Profiling (cf my profiling patch) revealed that the
> likely culprit was lookup_minimal_symbol, which took approximately 75%
> of the runtime (of `p *this'!).
> 
> This happens because minimal symbols are sorted by address, not name,
> so we have to do a linear search of the minimal symbols every time we
> want to find one.  My solution was to add a couple of hash tables to
> `struct objfile'.  We need two hash tables because we want to look up
> minimal symbols according to both their name and their demangled name.
> 
> The overhead of this approach is the size of the hash tables (they are
> an arbitrary fixed size) plus two pointers per minimal symbol.  I
> don't know if this is a big problem or not.
> 
> I do know that without this patch, `p *this' in my example took 3
> seconds (on my fast machine.  I've heard reports of up to 20 seconds
> on slower boxes).  With the patch it takes 0.6 seconds.
> 
> Is this ok to check in?
> 
> 1999-10-29  Tom Tromey  <tromey@cygnus.com>
> 
> 	* minsyms.c (compact_minimal_symbols): Added `objfile' argument.
> 	Put symbols into objfile's hash table.
> 	(install_minimal_symbols): Add symbol to objfile's demangled hash
> 	table.
> 	(lookup_minimal_symbol): Use hash table to find symbol in
> 	objfile.
> 	(msymbol_hash_iw, msymbol_hash): New functions.
> 	* symtab.h (struct minimal_symbol): New fields `hash_next',
> 	`demangled_hash_next'.
> 	(MSYMBOL_HASH_ADD): New macro.
> 	(MSYMBOL_DEMANGLED_HASH_ADD): Likewise.
> 	(msymbol_hash_iw, msymbol_hash): Declare.
> 	* objfiles.h (MINIMAL_SYMBOL_HASH_SIZE): New define.
> 	(struct objfile): New fields `msymbol_hash',
> 	`msymbol_demangled_hash'.
> 
> Tom
> 
> Index: gdb/minsyms.c
> ===================================================================
> RCS file: /cvs/cvsfiles/devo/gdb/minsyms.c,v
> retrieving revision 2.48
> diff -c -3 -p -r2.48 minsyms.c
> *** gdb/minsyms.c	1998/12/10 21:25:43	2.48
> --- gdb/minsyms.c	1999/11/02 20:58:32
> *************** static int
> *** 77,84 ****
>   compare_minimal_symbols PARAMS ((const void *, const void *));
>   
>   static int
> ! compact_minimal_symbols PARAMS ((struct minimal_symbol *, int));
>   
>   /* Look through all the current minimal symbol tables and find the
>      first minimal symbol that matches NAME.  If OBJF is non-NULL, limit
>      the search to that objfile.  If SFILE is non-NULL, limit the search
> --- 77,113 ----
>   compare_minimal_symbols PARAMS ((const void *, const void *));
>   
>   static int
> ! compact_minimal_symbols PARAMS ((struct minimal_symbol *, int,
> ! 				 struct objfile *));
>   
> + /* Compute a hash code based using the same criteria as `strcmp_iw'.  */
> + 
> + unsigned int
> + msymbol_hash_iw (const char *string)
> + {
> +   unsigned int hash = 0;
> +   while (*string && *string != '(')
> +     {
> +       while (isspace (*string))
> + 	++string;
> +       if (*string && *string != '(')
> + 	hash = (31 * hash) + *string;
> +       ++string;
> +     }
> +   return hash % MINIMAL_SYMBOL_HASH_SIZE;
> + }
> + 
> + /* Compute a hash code for a string.  */
> + 
> + unsigned int
> + msymbol_hash (const char *string)
> + {
> +   unsigned int hash = 0;
> +   for (; *string; ++string)
> +     hash = (31 * hash) + *string;
> +   return hash % MINIMAL_SYMBOL_HASH_SIZE;
> + }
> + 
>   /* Look through all the current minimal symbol tables and find the
>      first minimal symbol that matches NAME.  If OBJF is non-NULL, limit
>      the search to that objfile.  If SFILE is non-NULL, limit the search
> *************** lookup_minimal_symbol (name, sfile, objf
> *** 102,107 ****
> --- 131,139 ----
>     struct minimal_symbol *found_file_symbol = NULL;
>     struct minimal_symbol *trampoline_symbol = NULL;
>   
> +   unsigned int hash = msymbol_hash (name);
> +   unsigned int dem_hash = msymbol_hash_iw (name);
> + 
>   #ifdef SOFUN_ADDRESS_MAYBE_MISSING
>     if (sfile != NULL)
>       {
> *************** lookup_minimal_symbol (name, sfile, objf
> *** 117,126 ****
>       {
>         if (objf == NULL || objf == objfile)
>   	{
> ! 	  for (msymbol = objfile -> msymbols;
> ! 	       msymbol != NULL && SYMBOL_NAME (msymbol) != NULL &&
> ! 	       found_symbol == NULL;
> ! 	       msymbol++)
>   	    {
>   	      if (SYMBOL_MATCHES_NAME (msymbol, name))
>   		{
> --- 149,161 ----
>       {
>         if (objf == NULL || objf == objfile)
>   	{
> ! 	  /* Do two passes: the first over the ordinary hash table,
> ! 	     and the second over the demangled hash table.  */
> ! 	  int pass = 1;
> ! 
> ! 	  msymbol = objfile->msymbol_hash[hash];
> ! 	  
> ! 	  while (msymbol != NULL && found_symbol == NULL)
>   	    {
>   	      if (SYMBOL_MATCHES_NAME (msymbol, name))
>   		{
> *************** lookup_minimal_symbol (name, sfile, objf
> *** 158,163 ****
> --- 193,211 ----
>   		      break;
>   		    }
>   		}
> + 
> + 	      /* Find the next symbol on the hash chain.  At the end
> + 		 of the first pass, try the demangled hash list.  */
> + 	      if (pass == 1)
> + 		msymbol = msymbol->hash_next;
> + 	      else
> + 		msymbol = msymbol->demangled_hash_next;
> + 	      if (msymbol == NULL)
> + 		{
> + 		  ++pass;
> + 		  if (pass == 2)
> + 		    msymbol = objfile->msymbol_demangled_hash[dem_hash];
> + 		}
>   	    }
>   	}
>       }
> *************** discard_minimal_symbols (foo)
> *** 692,700 ****
>      overwrite its type with the type from the one we are compacting out.  */
>   
>   static int
> ! compact_minimal_symbols (msymbol, mcount)
>        struct minimal_symbol *msymbol;
>        int mcount;
>   {
>     struct minimal_symbol *copyfrom;
>     struct minimal_symbol *copyto;
> --- 740,749 ----
>      overwrite its type with the type from the one we are compacting out.  */
>   
>   static int
> ! compact_minimal_symbols (msymbol, mcount, objfile)
>        struct minimal_symbol *msymbol;
>        int mcount;
> +      struct objfile *objfile;
>   {
>     struct minimal_symbol *copyfrom;
>     struct minimal_symbol *copyto;
> *************** compact_minimal_symbols (msymbol, mcount
> *** 717,722 ****
> --- 766,772 ----
>   	  else
>   	    {
>   	      *copyto++ = *copyfrom++;
> + 	      MSYMBOL_HASH_ADD (copyto - 1, objfile);
>   	    }
>   	}
>         *copyto++ = *copyfrom++;
> *************** install_minimal_symbols (objfile)
> *** 809,815 ****
>         /* Compact out any duplicates, and free up whatever space we are
>   	 no longer using.  */
>         
> !       mcount = compact_minimal_symbols (msymbols, mcount);
>   
>         obstack_blank (&objfile->symbol_obstack,
>   	(mcount + 1 - alloc_count) * sizeof (struct minimal_symbol));
> --- 859,865 ----
>         /* Compact out any duplicates, and free up whatever space we are
>   	 no longer using.  */
>         
> !       mcount = compact_minimal_symbols (msymbols, mcount, objfile);
>   
>         obstack_blank (&objfile->symbol_obstack,
>   	(mcount + 1 - alloc_count) * sizeof (struct minimal_symbol));
> *************** install_minimal_symbols (objfile)
> *** 843,848 ****
> --- 893,900 ----
>         for ( ; mcount-- > 0 ; msymbols++)
>   	{
>   	  SYMBOL_INIT_DEMANGLED_NAME (msymbols, &objfile->symbol_obstack);
> + 	  if (SYMBOL_DEMANGLED_NAME (msymbols) != NULL)
> + 	    MSYMBOL_DEMANGLED_HASH_ADD (msymbols, objfile);
>   	}
>       }
>   }
> Index: gdb/objfiles.h
> ===================================================================
> RCS file: /cvs/cvsfiles/devo/gdb/objfiles.h,v
> retrieving revision 2.37
> diff -c -3 -p -r2.37 objfiles.h
> *** gdb/objfiles.h	1999/04/02 23:11:57	2.37
> --- gdb/objfiles.h	1999/11/02 20:58:38
> ***************
> *** 1,5 ****
>   /* Definitions for symbol file management in GDB.
> !    Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
>   
>   This file is part of GDB.
>   
> --- 1,5 ----
>   /* Definitions for symbol file management in GDB.
> !    Copyright (C) 1992, 1993, 1994, 1995, 1999 Free Software Foundation, Inc.
>   
>   This file is part of GDB.
>   
> *************** struct objstats {
> *** 195,200 ****
> --- 195,203 ----
>   extern void print_objfile_statistics PARAMS ((void));
>   extern void print_symbol_bcache_statistics PARAMS ((void));
>   
> + /* Number of entries in the minimal symbol hash table.  */
> + #define MINIMAL_SYMBOL_HASH_SIZE 349
> + 
>   /* Master structure for keeping track of each file from which
>      gdb reads symbols.  There are several ways these get allocated: 1.
>      The main symbol file, symfile_objfile, set by the symbol-file command,
> *************** struct objfile
> *** 302,307 ****
> --- 305,319 ----
>   
>     struct minimal_symbol *msymbols;
>     int minimal_symbol_count;
> + 
> +   /* This is a hash table used to index the minimal symbols by name.  */
> + 
> +   struct minimal_symbol *msymbol_hash[MINIMAL_SYMBOL_HASH_SIZE];
> + 
> +   /* This hash table is used to index the minimal symbols by their
> +      demangled names.  */
> + 
> +   struct minimal_symbol *msymbol_demangled_hash[MINIMAL_SYMBOL_HASH_SIZE];
>   
>     /* For object file formats which don't specify fundamental types, gdb
>        can create such types.  For now, it maintains a vector of pointers
> Index: gdb/symtab.h
> ===================================================================
> RCS file: /cvs/cvsfiles/devo/gdb/symtab.h,v
> retrieving revision 1.139
> diff -c -3 -p -r1.139 symtab.h
> *** gdb/symtab.h	1999/04/02 23:11:57	1.139
> --- gdb/symtab.h	1999/11/02 20:59:06
> *************** struct minimal_symbol
> *** 352,362 ****
> --- 352,392 ----
>         mst_file_data,		/* Static version of mst_data */
>         mst_file_bss		/* Static version of mst_bss */
>       } type BYTE_BITFIELD;
> + 
> +   /* Minimal symbols with the same hash key are kept on a linked
> +      list.  This is the link.  */
> + 
> +   struct minimal_symbol *hash_next;
> + 
> +   /* Minimal symbols are stored in two different hash tables.  This is
> +      the `next' pointer for the demangled hash table.  */
> + 
> +   struct minimal_symbol *demangled_hash_next;
>   };
>   
>   #define MSYMBOL_INFO(msymbol)		(msymbol)->info
>   #define MSYMBOL_TYPE(msymbol)		(msymbol)->type
>   
> + #define MSYMBOL_HASH_ADD(msymbol, objfile)				      \
> + do {									      \
> +   if ((msymbol)->hash_next == NULL)					      \
> +     {									      \
> +       unsigned int hash = msymbol_hash (SYMBOL_NAME (msymbol));		      \
> +       (msymbol)->hash_next = (objfile)->msymbol_hash[hash];		      \
> +       (objfile)->msymbol_hash[hash] = (msymbol);			      \
> +     }									      \
> + } while (0)
> + 
> + #define MSYMBOL_DEMANGLED_HASH_ADD(msymbol, objfile)			      \
> + do {									      \
> +   if ((msymbol)->demangled_hash_next == NULL)				      \
> +     {									      \
> +       unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (msymbol)); \
> +       (msymbol)->demangled_hash_next = (objfile)->msymbol_demangled_hash[hash]; \
> +       (objfile)->msymbol_demangled_hash[hash] = (msymbol);		      \
> +     }									      \
> + } while (0)
> + 
>   
>   /* All of the name-scope contours of the program
>      are represented by `struct block' objects.
> *************** extern CORE_ADDR find_stab_function_addr
> *** 1216,1221 ****
> --- 1246,1257 ----
>   						  struct partial_symtab *,
>   						  struct objfile *));
>   #endif
> + 
> + extern unsigned int
> + msymbol_hash_iw PARAMS ((const char *));
> + 
> + extern unsigned int
> + msymbol_hash PARAMS ((const char *));
>   
>   extern struct minimal_symbol *
>   lookup_minimal_symbol PARAMS ((const char *, const char *, struct objfile *));
> 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]