This is the mail archive of the
gdb@sourceware.org
mailing list for the GDB project.
Re: gdb segv in arm disassembler
On Thu, Jan 28, 2010 at 01:27:39PM -0500, Daniel Jacobowitz wrote:
> The best compromise I've thought of is:
>
> * If there are any mapping symbols in the symbol table, honor them.
> Ignore other symbols.
>
> * If there are no mapping symbols, default to code using the existing
> legacy search. This will mishandle recent files iff they have a code
> section containing only literal pools. Outside of test cases, this
> is unlikely.
Here is an implementation of the above. I've run the binutils
testsuites on arm-none-eabi and also verified that it fixes the
GDB segfault.
First we check mapping symbols. If we find a relevant mapping symbol,
we trust it. If not, but we find some other mapping symbol in the
file, we default to MAP_DATA. If there are no mapping symbols in the
file, we search other symbols. If there are no symbols at all, or
for non-ELF targets, we fall back to ARM mode code.
I've updated the testcases. dis-data.d and dis-data2.d should,
abstractly, generate .word output; they are really proxies for testing
stripped and legacy binary compatibility.
Does this look OK?
--
Daniel Jacobowitz
CodeSourcery
2010-01-28 Daniel Jacobowitz <dan@codesourcery.com>
gas/testsuite/
* gas/arm/dis-data.d: Update test name. Do not expect
.word output.
* gas/arm/dis-data2.d, gas/arm/dis-data2.s,
gas/arm/dis-data3.d, gas/arm/dis-data3.s: New tests.
opcodes/
* opcodes/arm-dis.c (struct arm_private_data): New.
(print_insn_coprocessor, print_insn_arm): Update to use struct
arm_private_data.
(is_mapping_symbol, get_map_sym_type): New functions.
(get_sym_code_type): Check the symbol's section. Do not check
mapping symbols.
(print_insn): Default to disassembling ARM mode code. Check
for mapping symbols separately from other symbols. Use
struct arm_private_data.
Index: gas/testsuite/gas/arm/dis-data.d
===================================================================
RCS file: /cvs/src/src/gas/testsuite/gas/arm/dis-data.d,v
retrieving revision 1.1
diff -u -p -r1.1 dis-data.d
--- gas/testsuite/gas/arm/dis-data.d 6 Jan 2010 15:02:45 -0000 1.1
+++ gas/testsuite/gas/arm/dis-data.d 28 Jan 2010 19:19:55 -0000
@@ -1,10 +1,10 @@
-# name: Data disassembler test
+# name: Data disassembler test (no symbols)
# skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
# objdump: -dr --prefix-addresses --show-raw-insn
.*: +file format .*arm.*
Disassembly of section \.text:
-0x00000000 20010000 .word 0x20010000
-0x00000004 000000f9 .word 0x000000f9
-0x00000008 00004cd5 .word 0x00004cd5
+0x00000000 20010000 andcs r0, r1, r0
+0x00000004 000000f9 strdeq r0, \[r0\], -r9
+0x00000008 00004cd5 ldrdeq r4, \[r0\], -r5
Index: gas/testsuite/gas/arm/dis-data2.d
===================================================================
RCS file: gas/testsuite/gas/arm/dis-data2.d
diff -N gas/testsuite/gas/arm/dis-data2.d
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ gas/testsuite/gas/arm/dis-data2.d 28 Jan 2010 19:19:55 -0000
@@ -0,0 +1,10 @@
+# name: Data disassembler test (function symbol)
+# skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+00000000 <main> 20010000 andcs r0, r1, r0
+00000004 <main\+0x4> 000000f9 strdeq r0, \[r0\], -r9
+00000008 <main\+0x8> 00004cd5 ldrdeq r4, \[r0\], -r5
Index: gas/testsuite/gas/arm/dis-data2.s
===================================================================
RCS file: gas/testsuite/gas/arm/dis-data2.s
diff -N gas/testsuite/gas/arm/dis-data2.s
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ gas/testsuite/gas/arm/dis-data2.s 28 Jan 2010 19:19:55 -0000
@@ -0,0 +1,8 @@
+.syntax unified
+.type main, %function
+.globl main
+main:
+.word 0x20010000
+.word 0x000000f9
+.word 0x00004cd5
+
Index: gas/testsuite/gas/arm/dis-data3.d
===================================================================
RCS file: gas/testsuite/gas/arm/dis-data3.d
diff -N gas/testsuite/gas/arm/dis-data3.d
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ gas/testsuite/gas/arm/dis-data3.d 28 Jan 2010 19:19:55 -0000
@@ -0,0 +1,11 @@
+# name: Data disassembler test (with mapping symbol)
+# skip: *-*-*coff *-*-pe *-*-wince *-*-*aout* *-*-netbsd *-*-riscix*
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+00000000 <main> 20010000 .word 0x20010000
+00000004 <main\+0x4> 000000f9 .word 0x000000f9
+00000008 <main\+0x8> 00004cd5 .word 0x00004cd5
+0000000c <main\+0xc> e1a00000 nop ; \(mov r0, r0\)
Index: gas/testsuite/gas/arm/dis-data3.s
===================================================================
RCS file: gas/testsuite/gas/arm/dis-data3.s
diff -N gas/testsuite/gas/arm/dis-data3.s
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ gas/testsuite/gas/arm/dis-data3.s 28 Jan 2010 19:19:55 -0000
@@ -0,0 +1,8 @@
+.syntax unified
+.type main, %function
+.globl main
+main:
+.word 0x20010000
+.word 0x000000f9
+.word 0x00004cd5
+nop
Index: opcodes/arm-dis.c
===================================================================
RCS file: /cvs/src/src/opcodes/arm-dis.c,v
retrieving revision 1.122
diff -u -p -r1.122 arm-dis.c
--- opcodes/arm-dis.c 20 Jan 2010 10:54:03 -0000 1.122
+++ opcodes/arm-dis.c 28 Jan 2010 19:19:58 -0000
@@ -45,6 +45,16 @@
#define NUM_ELEM(a) (sizeof (a) / sizeof (a)[0])
#endif
+struct arm_private_data
+{
+ /* The features to use when disassembling optional instructions. */
+ arm_feature_set features;
+
+ /* Whether any mapping symbols are present in the provided symbol
+ table. -1 if we do not know yet, otherwise 0 or 1. */
+ int has_mapping_symbols;
+};
+
struct opcode32
{
unsigned long arch; /* Architecture defining this insn. */
@@ -1750,7 +1760,8 @@ print_insn_coprocessor (bfd_vma pc,
fprintf_ftype func = info->fprintf_func;
unsigned long mask;
unsigned long value = 0;
- unsigned long allowed_arches = ((arm_feature_set *) info->private_data)->coproc;
+ struct arm_private_data *private_data = info->private_data;
+ unsigned long allowed_arches = private_data->features.coproc;
int cond;
for (insn = coprocessor_opcodes; insn->assembler; insn++)
@@ -1776,7 +1787,7 @@ print_insn_coprocessor (bfd_vma pc,
continue;
case SENTINEL_GENERIC_START:
- allowed_arches = ((arm_feature_set *) info->private_data)->core;
+ allowed_arches = private_data->features.core;
continue;
default:
@@ -2843,6 +2854,7 @@ print_insn_arm (bfd_vma pc, struct disas
const struct opcode32 *insn;
void *stream = info->stream;
fprintf_ftype func = info->fprintf_func;
+ struct arm_private_data *private_data = info->private_data;
if (print_insn_coprocessor (pc, info, given, FALSE))
return;
@@ -2855,7 +2867,7 @@ print_insn_arm (bfd_vma pc, struct disas
if ((given & insn->mask) != insn->value)
continue;
- if ((insn->arch & ((arm_feature_set *) info->private_data)->core) == 0)
+ if ((insn->arch & private_data->features.core) == 0)
continue;
/* Special case: an instruction with all bits set in the condition field
@@ -3057,7 +3069,7 @@ print_insn_arm (bfd_vma pc, struct disas
/* The p-variants of tst/cmp/cmn/teq are the pre-V6
mechanism for setting PSR flag bits. They are
obsolete in V6 onwards. */
- if (((((arm_feature_set *) info->private_data)->core) & ARM_EXT_V6) == 0)
+ if ((private_data->features.core & ARM_EXT_V6) == 0)
func (stream, "p");
}
break;
@@ -4268,7 +4280,44 @@ find_ifthen_state (bfd_vma pc,
ifthen_state = 0;
}
-/* Try to infer the code type (Arm or Thumb) from a symbol.
+/* Returns nonzero and sets *MAP_TYPE if the N'th symbol is a
+ mapping symbol. */
+
+static int
+is_mapping_symbol (struct disassemble_info *info, int n,
+ enum map_type *map_type)
+{
+ const char *name;
+
+ name = bfd_asymbol_name (info->symtab[n]);
+ if (name[0] == '$' && (name[1] == 'a' || name[1] == 't' || name[1] == 'd')
+ && (name[2] == 0 || name[2] == '.'))
+ {
+ *map_type = ((name[1] == 'a') ? MAP_ARM
+ : (name[1] == 't') ? MAP_THUMB
+ : MAP_DATA);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+/* Try to infer the code type (ARM or Thumb) from a mapping symbol.
+ Returns nonzero if *MAP_TYPE was set. */
+
+static int
+get_map_sym_type (struct disassemble_info *info,
+ int n,
+ enum map_type *map_type)
+{
+ /* If the symbol is in a different section, ignore it. */
+ if (info->section != NULL && info->section != info->symtab[n]->section)
+ return FALSE;
+
+ return is_mapping_symbol (info, n, map_type);
+}
+
+/* Try to infer the code type (ARM or Thumb) from a non-mapping symbol.
Returns nonzero if *MAP_TYPE was set. */
static int
@@ -4278,7 +4327,10 @@ get_sym_code_type (struct disassemble_in
{
elf_symbol_type *es;
unsigned int type;
- const char *name;
+
+ /* If the symbol is in a different section, ignore it. */
+ if (info->section != NULL && info->section != info->symtab[n]->section)
+ return FALSE;
es = *(elf_symbol_type **)(info->symtab + n);
type = ELF_ST_TYPE (es->internal_elf_sym.st_info);
@@ -4290,17 +4342,6 @@ get_sym_code_type (struct disassemble_in
return TRUE;
}
- /* Check for mapping symbols. */
- name = bfd_asymbol_name (info->symtab[n]);
- if (name[0] == '$' && (name[1] == 'a' || name[1] == 't' || name[1] == 'd')
- && (name[2] == 0 || name[2] == '.'))
- {
- *map_type = ((name[1] == 'a') ? MAP_ARM
- : (name[1] == 't') ? MAP_THUMB
- : MAP_DATA);
- return TRUE;
- }
-
return FALSE;
}
@@ -4355,12 +4396,12 @@ print_insn (bfd_vma pc, struct disassemb
long given;
int status;
int is_thumb = FALSE;
- int is_data = (bfd_asymbol_flavour (*info->symtab)
- == bfd_target_elf_flavour) ? TRUE : FALSE;
+ int is_data = FALSE;
int little_code;
unsigned int size = 4;
void (*printer) (bfd_vma, struct disassemble_info *, long);
bfd_boolean found = FALSE;
+ struct arm_private_data *private_data;
if (info->disassembler_options)
{
@@ -4373,7 +4414,7 @@ print_insn (bfd_vma pc, struct disassemb
/* PR 10288: Control which instructions will be disassembled. */
if (info->private_data == NULL)
{
- static arm_feature_set features;
+ static struct arm_private_data private;
if ((info->flags & USER_SPECIFIED_MACHINE_TYPE) == 0)
/* If the user did not use the -m command line switch then default to
@@ -4399,67 +4440,124 @@ print_insn (bfd_vma pc, struct disassemb
/* Compute the architecture bitmask from the machine number.
Note: This assumes that the machine number will not change
during disassembly.... */
- select_arm_features (info->mach, & features);
+ select_arm_features (info->mach, & private.features);
- info->private_data = & features;
+ private.has_mapping_symbols = -1;
+
+ info->private_data = & private;
}
-
+
+ private_data = info->private_data;
+
/* Decide if our code is going to be little-endian, despite what the
function argument might say. */
little_code = ((info->endian_code == BFD_ENDIAN_LITTLE) || little);
- /* First check the full symtab for a mapping symbol, even if there
- are no usable non-mapping symbols for this address. */
+ /* For ELF, consult the symbol table to determine what kind of code
+ or data we have. */
if (info->symtab_size != 0
&& bfd_asymbol_flavour (*info->symtab) == bfd_target_elf_flavour)
{
bfd_vma addr;
- int n;
+ int n, start;
int last_sym = -1;
- enum map_type type = MAP_DATA;
+ enum map_type type = MAP_ARM;
- if (pc <= last_mapping_addr)
- last_mapping_sym = -1;
- is_thumb = (last_type == MAP_THUMB);
- found = FALSE;
/* Start scanning at the start of the function, or wherever
we finished last time. */
- n = info->symtab_pos + 1;
- if (n < last_mapping_sym)
- n = last_mapping_sym;
+ start = info->symtab_pos + 1;
+ if (start < last_mapping_sym)
+ start = last_mapping_sym;
+ found = FALSE;
- /* Scan up to the location being disassembled. */
- for (; n < info->symtab_size; n++)
+ /* First, look for mapping symbols. */
+ if (private_data->has_mapping_symbols != 0)
{
- addr = bfd_asymbol_value (info->symtab[n]);
- if (addr > pc)
- break;
- if ((info->section == NULL
- || info->section == info->symtab[n]->section)
- && get_sym_code_type (info, n, &type))
+ /* Scan up to the location being disassembled. */
+ for (n = start; n < info->symtab_size; n++)
+ {
+ addr = bfd_asymbol_value (info->symtab[n]);
+ if (addr > pc)
+ break;
+ if (get_map_sym_type (info, n, &type))
+ {
+ last_sym = n;
+ found = TRUE;
+ }
+ }
+
+ if (!found)
{
- last_sym = n;
+ /* No mapping symbol found at this address. Look backwards
+ for a preceeding one. */
+ for (n = start - 1; n >= 0; n--)
+ {
+ if (get_map_sym_type (info, n, &type))
+ {
+ last_sym = n;
+ found = TRUE;
+ break;
+ }
+ }
+ }
+
+ if (found)
+ private_data->has_mapping_symbols = 1;
+
+ /* No mapping symbols were found. A leading $d may be
+ omitted for sections which start with data; but for
+ compatibility with legacy and stripped binaries, only
+ assume the leading $d if there is at least one mapping
+ symbol in the file. */
+ if (!found && private_data->has_mapping_symbols == -1)
+ {
+ /* Look for mapping symbols, in any section. */
+ for (n = 0; n < info->symtab_size; n++)
+ if (is_mapping_symbol (info, n, &type))
+ {
+ private_data->has_mapping_symbols = 1;
+ break;
+ }
+ if (private_data->has_mapping_symbols == -1)
+ private_data->has_mapping_symbols = 0;
+ }
+
+ if (!found && private_data->has_mapping_symbols == 1)
+ {
+ type = MAP_DATA;
found = TRUE;
}
}
+ /* Next search for function symbols to separate ARM from Thumb
+ in binaries without mapping symbols. */
if (!found)
{
- n = info->symtab_pos;
- if (n < last_mapping_sym - 1)
- n = last_mapping_sym - 1;
-
- /* No mapping symbol found at this address. Look backwards
- for a preceeding one. */
- for (; n >= 0; n--)
+ /* Scan up to the location being disassembled. */
+ for (n = start; n < info->symtab_size; n++)
{
- if ((info->section == NULL
- || info->section == info->symtab[n]->section)
- && get_sym_code_type (info, n, &type))
+ addr = bfd_asymbol_value (info->symtab[n]);
+ if (addr > pc)
+ break;
+ if (get_sym_code_type (info, n, &type))
{
last_sym = n;
found = TRUE;
- break;
+ }
+ }
+
+ if (!found)
+ {
+ /* No mapping symbol found at this address. Look backwards
+ for a preceeding one. */
+ for (n = start - 1; n >= 0; n--)
+ {
+ if (get_sym_code_type (info, n, &type))
+ {
+ last_sym = n;
+ found = TRUE;
+ break;
+ }
}
}
}