This is the mail archive of the
gdb-patches@sourceware.cygnus.com
mailing list for the GDB project.
Re: Patch: hash tables for msymbols
- To: tromey at cygnus dot com
- Subject: Re: Patch: hash tables for msymbols
- From: Jim Blandy <jimb at zwingli dot cygnus dot com>
- Date: 03 Mar 2000 17:50:00 -0500
- Cc: gdb-patches at sourceware dot cygnus dot com
- References: <87aeowo9jq.fsf@cygnus.com>
This looks good; please commit it to sourceware.
However, I do think that big macros like MSYMBOL_HASH_ADD and
MSYMBOL_DEMANGLED_HASH_ADD are not too great.
I'd prefer either:
- defining one function which takes a minsym and one of the hash
tables --- either msymbol_hash or msymbol_demangled_hash --- as
arguments, and does the insertion, or
- just stick the code in directly where it's called.
> Recently I looked at a speed problem in gdb. Some operations, such as
> printing a large C++ structure (in a large program), were very slow
> the first time. Profiling (cf my profiling patch) revealed that the
> likely culprit was lookup_minimal_symbol, which took approximately 75%
> of the runtime (of `p *this'!).
>
> This happens because minimal symbols are sorted by address, not name,
> so we have to do a linear search of the minimal symbols every time we
> want to find one. My solution was to add a couple of hash tables to
> `struct objfile'. We need two hash tables because we want to look up
> minimal symbols according to both their name and their demangled name.
>
> The overhead of this approach is the size of the hash tables (they are
> an arbitrary fixed size) plus two pointers per minimal symbol. I
> don't know if this is a big problem or not.
>
> I do know that without this patch, `p *this' in my example took 3
> seconds (on my fast machine. I've heard reports of up to 20 seconds
> on slower boxes). With the patch it takes 0.6 seconds.
>
> Is this ok to check in?
>
> 1999-10-29 Tom Tromey <tromey@cygnus.com>
>
> * minsyms.c (compact_minimal_symbols): Added `objfile' argument.
> Put symbols into objfile's hash table.
> (install_minimal_symbols): Add symbol to objfile's demangled hash
> table.
> (lookup_minimal_symbol): Use hash table to find symbol in
> objfile.
> (msymbol_hash_iw, msymbol_hash): New functions.
> * symtab.h (struct minimal_symbol): New fields `hash_next',
> `demangled_hash_next'.
> (MSYMBOL_HASH_ADD): New macro.
> (MSYMBOL_DEMANGLED_HASH_ADD): Likewise.
> (msymbol_hash_iw, msymbol_hash): Declare.
> * objfiles.h (MINIMAL_SYMBOL_HASH_SIZE): New define.
> (struct objfile): New fields `msymbol_hash',
> `msymbol_demangled_hash'.
>
> Tom
>
> Index: gdb/minsyms.c
> ===================================================================
> RCS file: /cvs/cvsfiles/devo/gdb/minsyms.c,v
> retrieving revision 2.48
> diff -c -3 -p -r2.48 minsyms.c
> *** gdb/minsyms.c 1998/12/10 21:25:43 2.48
> --- gdb/minsyms.c 1999/11/02 20:58:32
> *************** static int
> *** 77,84 ****
> compare_minimal_symbols PARAMS ((const void *, const void *));
>
> static int
> ! compact_minimal_symbols PARAMS ((struct minimal_symbol *, int));
>
> /* Look through all the current minimal symbol tables and find the
> first minimal symbol that matches NAME. If OBJF is non-NULL, limit
> the search to that objfile. If SFILE is non-NULL, limit the search
> --- 77,113 ----
> compare_minimal_symbols PARAMS ((const void *, const void *));
>
> static int
> ! compact_minimal_symbols PARAMS ((struct minimal_symbol *, int,
> ! struct objfile *));
>
> + /* Compute a hash code based using the same criteria as `strcmp_iw'. */
> +
> + unsigned int
> + msymbol_hash_iw (const char *string)
> + {
> + unsigned int hash = 0;
> + while (*string && *string != '(')
> + {
> + while (isspace (*string))
> + ++string;
> + if (*string && *string != '(')
> + hash = (31 * hash) + *string;
> + ++string;
> + }
> + return hash % MINIMAL_SYMBOL_HASH_SIZE;
> + }
> +
> + /* Compute a hash code for a string. */
> +
> + unsigned int
> + msymbol_hash (const char *string)
> + {
> + unsigned int hash = 0;
> + for (; *string; ++string)
> + hash = (31 * hash) + *string;
> + return hash % MINIMAL_SYMBOL_HASH_SIZE;
> + }
> +
> /* Look through all the current minimal symbol tables and find the
> first minimal symbol that matches NAME. If OBJF is non-NULL, limit
> the search to that objfile. If SFILE is non-NULL, limit the search
> *************** lookup_minimal_symbol (name, sfile, objf
> *** 102,107 ****
> --- 131,139 ----
> struct minimal_symbol *found_file_symbol = NULL;
> struct minimal_symbol *trampoline_symbol = NULL;
>
> + unsigned int hash = msymbol_hash (name);
> + unsigned int dem_hash = msymbol_hash_iw (name);
> +
> #ifdef SOFUN_ADDRESS_MAYBE_MISSING
> if (sfile != NULL)
> {
> *************** lookup_minimal_symbol (name, sfile, objf
> *** 117,126 ****
> {
> if (objf == NULL || objf == objfile)
> {
> ! for (msymbol = objfile -> msymbols;
> ! msymbol != NULL && SYMBOL_NAME (msymbol) != NULL &&
> ! found_symbol == NULL;
> ! msymbol++)
> {
> if (SYMBOL_MATCHES_NAME (msymbol, name))
> {
> --- 149,161 ----
> {
> if (objf == NULL || objf == objfile)
> {
> ! /* Do two passes: the first over the ordinary hash table,
> ! and the second over the demangled hash table. */
> ! int pass = 1;
> !
> ! msymbol = objfile->msymbol_hash[hash];
> !
> ! while (msymbol != NULL && found_symbol == NULL)
> {
> if (SYMBOL_MATCHES_NAME (msymbol, name))
> {
> *************** lookup_minimal_symbol (name, sfile, objf
> *** 158,163 ****
> --- 193,211 ----
> break;
> }
> }
> +
> + /* Find the next symbol on the hash chain. At the end
> + of the first pass, try the demangled hash list. */
> + if (pass == 1)
> + msymbol = msymbol->hash_next;
> + else
> + msymbol = msymbol->demangled_hash_next;
> + if (msymbol == NULL)
> + {
> + ++pass;
> + if (pass == 2)
> + msymbol = objfile->msymbol_demangled_hash[dem_hash];
> + }
> }
> }
> }
> *************** discard_minimal_symbols (foo)
> *** 692,700 ****
> overwrite its type with the type from the one we are compacting out. */
>
> static int
> ! compact_minimal_symbols (msymbol, mcount)
> struct minimal_symbol *msymbol;
> int mcount;
> {
> struct minimal_symbol *copyfrom;
> struct minimal_symbol *copyto;
> --- 740,749 ----
> overwrite its type with the type from the one we are compacting out. */
>
> static int
> ! compact_minimal_symbols (msymbol, mcount, objfile)
> struct minimal_symbol *msymbol;
> int mcount;
> + struct objfile *objfile;
> {
> struct minimal_symbol *copyfrom;
> struct minimal_symbol *copyto;
> *************** compact_minimal_symbols (msymbol, mcount
> *** 717,722 ****
> --- 766,772 ----
> else
> {
> *copyto++ = *copyfrom++;
> + MSYMBOL_HASH_ADD (copyto - 1, objfile);
> }
> }
> *copyto++ = *copyfrom++;
> *************** install_minimal_symbols (objfile)
> *** 809,815 ****
> /* Compact out any duplicates, and free up whatever space we are
> no longer using. */
>
> ! mcount = compact_minimal_symbols (msymbols, mcount);
>
> obstack_blank (&objfile->symbol_obstack,
> (mcount + 1 - alloc_count) * sizeof (struct minimal_symbol));
> --- 859,865 ----
> /* Compact out any duplicates, and free up whatever space we are
> no longer using. */
>
> ! mcount = compact_minimal_symbols (msymbols, mcount, objfile);
>
> obstack_blank (&objfile->symbol_obstack,
> (mcount + 1 - alloc_count) * sizeof (struct minimal_symbol));
> *************** install_minimal_symbols (objfile)
> *** 843,848 ****
> --- 893,900 ----
> for ( ; mcount-- > 0 ; msymbols++)
> {
> SYMBOL_INIT_DEMANGLED_NAME (msymbols, &objfile->symbol_obstack);
> + if (SYMBOL_DEMANGLED_NAME (msymbols) != NULL)
> + MSYMBOL_DEMANGLED_HASH_ADD (msymbols, objfile);
> }
> }
> }
> Index: gdb/objfiles.h
> ===================================================================
> RCS file: /cvs/cvsfiles/devo/gdb/objfiles.h,v
> retrieving revision 2.37
> diff -c -3 -p -r2.37 objfiles.h
> *** gdb/objfiles.h 1999/04/02 23:11:57 2.37
> --- gdb/objfiles.h 1999/11/02 20:58:38
> ***************
> *** 1,5 ****
> /* Definitions for symbol file management in GDB.
> ! Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
>
> This file is part of GDB.
>
> --- 1,5 ----
> /* Definitions for symbol file management in GDB.
> ! Copyright (C) 1992, 1993, 1994, 1995, 1999 Free Software Foundation, Inc.
>
> This file is part of GDB.
>
> *************** struct objstats {
> *** 195,200 ****
> --- 195,203 ----
> extern void print_objfile_statistics PARAMS ((void));
> extern void print_symbol_bcache_statistics PARAMS ((void));
>
> + /* Number of entries in the minimal symbol hash table. */
> + #define MINIMAL_SYMBOL_HASH_SIZE 349
> +
> /* Master structure for keeping track of each file from which
> gdb reads symbols. There are several ways these get allocated: 1.
> The main symbol file, symfile_objfile, set by the symbol-file command,
> *************** struct objfile
> *** 302,307 ****
> --- 305,319 ----
>
> struct minimal_symbol *msymbols;
> int minimal_symbol_count;
> +
> + /* This is a hash table used to index the minimal symbols by name. */
> +
> + struct minimal_symbol *msymbol_hash[MINIMAL_SYMBOL_HASH_SIZE];
> +
> + /* This hash table is used to index the minimal symbols by their
> + demangled names. */
> +
> + struct minimal_symbol *msymbol_demangled_hash[MINIMAL_SYMBOL_HASH_SIZE];
>
> /* For object file formats which don't specify fundamental types, gdb
> can create such types. For now, it maintains a vector of pointers
> Index: gdb/symtab.h
> ===================================================================
> RCS file: /cvs/cvsfiles/devo/gdb/symtab.h,v
> retrieving revision 1.139
> diff -c -3 -p -r1.139 symtab.h
> *** gdb/symtab.h 1999/04/02 23:11:57 1.139
> --- gdb/symtab.h 1999/11/02 20:59:06
> *************** struct minimal_symbol
> *** 352,362 ****
> --- 352,392 ----
> mst_file_data, /* Static version of mst_data */
> mst_file_bss /* Static version of mst_bss */
> } type BYTE_BITFIELD;
> +
> + /* Minimal symbols with the same hash key are kept on a linked
> + list. This is the link. */
> +
> + struct minimal_symbol *hash_next;
> +
> + /* Minimal symbols are stored in two different hash tables. This is
> + the `next' pointer for the demangled hash table. */
> +
> + struct minimal_symbol *demangled_hash_next;
> };
>
> #define MSYMBOL_INFO(msymbol) (msymbol)->info
> #define MSYMBOL_TYPE(msymbol) (msymbol)->type
>
> + #define MSYMBOL_HASH_ADD(msymbol, objfile) \
> + do { \
> + if ((msymbol)->hash_next == NULL) \
> + { \
> + unsigned int hash = msymbol_hash (SYMBOL_NAME (msymbol)); \
> + (msymbol)->hash_next = (objfile)->msymbol_hash[hash]; \
> + (objfile)->msymbol_hash[hash] = (msymbol); \
> + } \
> + } while (0)
> +
> + #define MSYMBOL_DEMANGLED_HASH_ADD(msymbol, objfile) \
> + do { \
> + if ((msymbol)->demangled_hash_next == NULL) \
> + { \
> + unsigned int hash = msymbol_hash_iw (SYMBOL_DEMANGLED_NAME (msymbol)); \
> + (msymbol)->demangled_hash_next = (objfile)->msymbol_demangled_hash[hash]; \
> + (objfile)->msymbol_demangled_hash[hash] = (msymbol); \
> + } \
> + } while (0)
> +
>
> /* All of the name-scope contours of the program
> are represented by `struct block' objects.
> *************** extern CORE_ADDR find_stab_function_addr
> *** 1216,1221 ****
> --- 1246,1257 ----
> struct partial_symtab *,
> struct objfile *));
> #endif
> +
> + extern unsigned int
> + msymbol_hash_iw PARAMS ((const char *));
> +
> + extern unsigned int
> + msymbol_hash PARAMS ((const char *));
>
> extern struct minimal_symbol *
> lookup_minimal_symbol PARAMS ((const char *, const char *, struct objfile *));
>