This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PR13235 fix


This patches fixes PR13235 by performing the addis,addi -> nop,addi
optimization on a per-file basis rather than per-section.  See the PR
for what was failing when using gcc -freorder-blocks-and_partition.

ld will no longer optimize code containing sequences like
 lis 9,xxx@toc@ha
 addi 9,9,xxx@toc@l
 ldx 3,2,9
a conceivable toc access, but one that gcc doesn't generate.

Applying mainline.  I'll put it on the branch mid next week.

bfd/
	PR ld/13235
	* elf64-ppc.c (struct ppc64_elf_obj_tdata): Add ha_relocs_not_using_r2.
	(ppc64_elf_edit_toc): Check HA relocs.
	(ha_reloc_match): Delete function.
	(ppc64_elf_relocate_section): Remove delayed HA nop optimization.
	Instead do it and low part optimization based on
	ha_relocs_not_using_r2.
ld/testsuite/
	* ld-powerpc/tocopt.d: Update.
	* ld-powerpc/tocopt5.d, * ld-powerpc/tocopt5.s: New test.
	* ld-powerpc/powerpc.exp: Run new test.

Index: bfd/elf64-ppc.c
===================================================================
RCS file: /cvs/src/src/bfd/elf64-ppc.c,v
retrieving revision 1.363
diff -u -p -r1.363 elf64-ppc.c
--- bfd/elf64-ppc.c	7 Sep 2011 13:56:09 -0000	1.363
+++ bfd/elf64-ppc.c	30 Sep 2011 04:00:07 -0000
@@ -2598,7 +2612,10 @@ struct ppc64_elf_obj_tdata
 
   /* Nonzero if this bfd has small toc/got relocs, ie. that expect
      the reloc to be in the range -32768 to 32767.  */
-  unsigned int has_small_toc_reloc;
+  unsigned int has_small_toc_reloc : 1;
+
+  /* Set if toc/got ha relocs detected not using r2.  */
+  unsigned int ha_relocs_not_using_r2 : 1;
 };
 
 #define ppc64_elf_tdata(bfd) \
@@ -8173,6 +8271,35 @@ ppc64_elf_edit_toc (struct bfd_link_info
 		r_type = ELF64_R_TYPE (rel->r_info);
 		switch (r_type)
 		  {
+		  default:
+		    break;
+
+		  case R_PPC64_GOT_TLSLD16_HA:
+		  case R_PPC64_GOT_TLSGD16_HA:
+		  case R_PPC64_GOT_TPREL16_HA:
+		  case R_PPC64_GOT_DTPREL16_HA:
+		  case R_PPC64_GOT16_HA:
+		  case R_PPC64_TOC16_HA:
+		    {
+		      bfd_vma off = rel->r_offset & ~3;
+		      unsigned char buf[4];
+		      unsigned int insn;
+
+		      if (!bfd_get_section_contents (ibfd, sec, buf, off, 4))
+			{
+			  free (used);
+			  goto error_ret;
+			}
+		      insn = bfd_get_32 (ibfd, buf);
+		      if ((insn & ((0x3f << 26) | 0x1f << 16))
+			  != ((15u << 26) | (2 << 16)) /* addis rt,2,imm */)
+			ppc64_elf_tdata (ibfd)->ha_relocs_not_using_r2 = 1;
+		    }
+		    break;
+		  }
+
+		switch (r_type)
+		  {
 		  case R_PPC64_TOC16:
 		  case R_PPC64_TOC16_LO:
 		  case R_PPC64_TOC16_HI:
@@ -8220,7 +8347,10 @@ ppc64_elf_edit_toc (struct bfd_link_info
 		      case R_PPC64_TOC16_LO_DS:
 			off = rel->r_offset + (bfd_big_endian (ibfd) ? -2 : 3);
 			if (!bfd_get_section_contents (ibfd, sec, &opc, off, 1))
-			  return FALSE;
+			  {
+			    free (used);
+			    goto error_ret;
+			  }
 			if ((opc & (0x3f << 2)) == (58u << 2))
 			  break;
 			/* Fall thru */
@@ -11673,63 +11812,6 @@ ppc64_elf_action_discarded (asection *se
   return _bfd_elf_default_action_discarded (sec);
 }
 
-/* REL points to a low-part reloc on a largetoc instruction sequence.
-   Find the matching high-part reloc instruction and verify that it
-   is addis REG,x,imm.  If so, set *REG to x and return a pointer to
-   the high-part reloc.  */
-
-static const Elf_Internal_Rela *
-ha_reloc_match (const Elf_Internal_Rela *relocs,
-		const Elf_Internal_Rela *rel,
-		unsigned int *reg,
-		bfd_boolean match_addend,
-		const bfd *input_bfd,
-		const bfd_byte *contents)
-{
-  enum elf_ppc64_reloc_type r_type, r_type_ha;
-  bfd_vma r_info_ha, r_addend;
-
-  r_type = ELF64_R_TYPE (rel->r_info);
-  switch (r_type)
-    {
-    case R_PPC64_GOT_TLSLD16_LO:
-    case R_PPC64_GOT_TLSGD16_LO:
-    case R_PPC64_GOT_TPREL16_LO_DS:
-    case R_PPC64_GOT_DTPREL16_LO_DS:
-    case R_PPC64_GOT16_LO:
-    case R_PPC64_TOC16_LO:
-      r_type_ha = r_type + 2;
-      break;
-    case R_PPC64_GOT16_LO_DS:
-      r_type_ha = R_PPC64_GOT16_HA;
-      break;
-    case R_PPC64_TOC16_LO_DS:
-      r_type_ha = R_PPC64_TOC16_HA;
-      break;
-    default:
-      abort ();
-    }
-  r_info_ha = ELF64_R_INFO (ELF64_R_SYM (rel->r_info), r_type_ha);
-  r_addend = rel->r_addend;
-
-  while (--rel >= relocs)
-    if (rel->r_info == r_info_ha
-	&& (!match_addend
-	    || rel->r_addend == r_addend))
-      {
-	const bfd_byte *p = contents + (rel->r_offset & ~3);
-	unsigned int insn = bfd_get_32 (input_bfd, p);
-	if ((insn & (0x3f << 26)) == (15u << 26) /* addis rt,x,imm */
-	    && (insn & (0x1f << 21)) == (*reg << 21))
-	  {
-	    *reg = (insn >> 16) & 0x1f;
-	    return rel;
-	  }
-	break;
-      }
-  return NULL;
-}
-
 /* The RELOCATE_SECTION function is called by the ELF backend linker
    to handle the relocations for a section.
 
@@ -11777,9 +11859,7 @@ ppc64_elf_relocate_section (bfd *output_
   Elf_Internal_Rela outrel;
   bfd_byte *loc;
   struct got_entry **local_got_ents;
-  unsigned char *ha_opt;
   bfd_vma TOCstart;
-  bfd_boolean no_ha_opt;
   bfd_boolean ret = TRUE;
   bfd_boolean is_opd;
   /* Disabled until we sort out how ld should choose 'y' vs 'at'.  */
@@ -11805,8 +11885,6 @@ ppc64_elf_relocate_section (bfd *output_
   symtab_hdr = &elf_symtab_hdr (input_bfd);
   sym_hashes = elf_sym_hashes (input_bfd);
   is_opd = ppc64_elf_section_data (input_section)->sec_type == sec_opd;
-  ha_opt = NULL;
-  no_ha_opt = FALSE;
 
   rel = relocs;
   relend = relocs + input_section->reloc_count;
@@ -13218,7 +13318,12 @@ ppc64_elf_relocate_section (bfd *output_
 	case R_PPC64_GOT_DTPREL16_HA:
 	case R_PPC64_GOT16_HA:
 	case R_PPC64_TOC16_HA:
-	  /* nop is done later.  */
+	  if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000
+	      && !ppc64_elf_tdata (input_bfd)->ha_relocs_not_using_r2)
+	    {
+	      bfd_byte *p = contents + (rel->r_offset & ~3);
+	      bfd_put_32 (input_bfd, NOP, p);
+	    }
 	  break;
 
 	case R_PPC64_GOT_TLSLD16_LO:
@@ -13229,7 +13334,8 @@ ppc64_elf_relocate_section (bfd *output_
 	case R_PPC64_GOT16_LO_DS:
 	case R_PPC64_TOC16_LO:
 	case R_PPC64_TOC16_LO_DS:
-	  if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000)
+	  if (htab->do_toc_opt && relocation + addend + 0x8000 < 0x10000
+	      && !ppc64_elf_tdata (input_bfd)->ha_relocs_not_using_r2)
 	    {
 	      bfd_byte *p = contents + (rel->r_offset & ~3);
 	      insn = bfd_get_32 (input_bfd, p);
@@ -13252,32 +13358,9 @@ ppc64_elf_relocate_section (bfd *output_
 		  || ((insn & (0x3f << 26)) == 62u << 26 /* std, stmd */
 		      && ((insn & 3) == 0 || (insn & 3) == 3)))
 		{
-		  unsigned int reg = (insn >> 16) & 0x1f;
-		  const Elf_Internal_Rela *ha;
-		  bfd_boolean match_addend;
-
-		  match_addend = (sym != NULL
-				  && ELF_ST_TYPE (sym->st_info) == STT_SECTION);
-		  ha = ha_reloc_match (relocs, rel, &reg, match_addend,
-				       input_bfd, contents);
-		  if (ha != NULL)
-		    {
-		      insn &= ~(0x1f << 16);
-		      insn |= reg << 16;
-		      bfd_put_32 (input_bfd, insn, p);
-		      if (ha_opt == NULL)
-			{
-			  ha_opt = bfd_zmalloc (input_section->reloc_count);
-			  if (ha_opt == NULL)
-			    return FALSE;
-			}
-		      ha_opt[ha - relocs] = 1;
-		    }
-		  else
-		    /* If we don't find a matching high part insn,
-		       something is fishy.  Refuse to nop any high
-		       part insn in this section.  */
-		    no_ha_opt = TRUE;
+		  insn &= ~(0x1f << 16);
+		  insn |= 2 << 16;
+		  bfd_put_32 (input_bfd, insn, p);
 		}
 	    }
 	  break;
@@ -13431,23 +13514,6 @@ ppc64_elf_relocate_section (bfd *output_
 	}
     }
 
-  if (ha_opt != NULL)
-    {
-      if (!no_ha_opt)
-	{
-	  unsigned char *opt = ha_opt;
-	  rel = relocs;
-	  relend = relocs + input_section->reloc_count;
-	  for (; rel < relend; opt++, rel++)
-	    if (*opt != 0)
-	      {
-		bfd_byte *p = contents + (rel->r_offset & ~3);
-		bfd_put_32 (input_bfd, NOP, p);
-	      }
-	}
-      free (ha_opt);
-    }
-
   /* If we're emitting relocations, then shortly after this function
      returns, reloc offsets and addends for this section will be
      adjusted.  Worse, reloc symbol indices will be for the output
Index: ld/testsuite/ld-powerpc/powerpc.exp
===================================================================
RCS file: /cvs/src/src/ld/testsuite/ld-powerpc/powerpc.exp,v
retrieving revision 1.36
diff -u -p -r1.36 powerpc.exp
--- ld/testsuite/ld-powerpc/powerpc.exp	22 Aug 2011 14:28:54 -0000	1.36
+++ ld/testsuite/ld-powerpc/powerpc.exp	30 Sep 2011 04:00:53 -0000
@@ -211,6 +211,8 @@ set ppc64elftests {
 	{{objdump -s tocopt3.d}} "tocopt3"}
     {"TOC opt4" "-melf64ppc -no-keep-memory --defsym x=2" "-a64"
         {tocopt4a.s tocopt4b.s} {{objdump -s tocopt4.d}} "tocopt4"}
+    {"TOC opt5" "-melf64ppc" "-a64"  {tocopt5.s}
+	{{objdump -s tocopt5.d}} "tocopt5"}
 }
 
 
Index: ld/testsuite/ld-powerpc/tocopt.d
===================================================================
RCS file: /cvs/src/src/ld/testsuite/ld-powerpc/tocopt.d,v
retrieving revision 1.1
diff -u -p -r1.1 tocopt.d
--- ld/testsuite/ld-powerpc/tocopt.d	5 Aug 2010 14:38:10 -0000	1.1
+++ ld/testsuite/ld-powerpc/tocopt.d	30 Sep 2011 04:00:53 -0000
@@ -2,10 +2,10 @@
 .*:     file format .*
 
 Contents of section \.text:
- 100000b0 60000000 e9228018 60000000 38a28020  .*
- 100000c0 e8c50000 60000000 3ba08028 7c62e82a  .*
- 100000d0 60000000 39228033 60000000 38a28008  .*
- 100000e0 e8c50000 60000000 3ba08010 7c62e82a  .*
+ 100000b0 3d220000 e9298018 3c820000 38a48020  .*
+ 100000c0 e8c50000 3fa00000 3bbd8028 7c62e82a  .*
+ 100000d0 3d220000 39298033 3c820000 38a48008  .*
+ 100000e0 e8c50000 3fa00000 3bbd8010 7c62e82a  .*
 Contents of section \.got:
  100100f0 00000000 100180f0 00000000 10010124  .*
  10010100 00000000 10010125 00000000 10010120  .*
Index: ld/testsuite/ld-powerpc/tocopt5.d
===================================================================
RCS file: ld/testsuite/ld-powerpc/tocopt5.d
diff -N ld/testsuite/ld-powerpc/tocopt5.d
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ ld/testsuite/ld-powerpc/tocopt5.d	30 Sep 2011 04:20:00 -0000
@@ -0,0 +1,13 @@
+
+.*:     file format .*
+
+Contents of section \.text:
+ 100000b0 60000000 e9228018 60000000 38a28020  .*
+ 100000c0 e8c50000 60000000 3922802b 60000000  .*
+ 100000d0 38a28008 e8c50000                    .*
+Contents of section \.got:
+ 100100d8 00000000 100180d8 00000000 10010104  .*
+ 100100e8 00000000 10010105 00000000 10010100  .*
+ 100100f8 00000000 10010101                    .*
+Contents of section \.sdata:
+ 10010100 01020304 0506                        .*
Index: ld/testsuite/ld-powerpc/tocopt5.s
===================================================================
RCS file: ld/testsuite/ld-powerpc/tocopt5.s
diff -N ld/testsuite/ld-powerpc/tocopt5.s
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ ld/testsuite/ld-powerpc/tocopt5.s	30 Sep 2011 04:20:00 -0000
@@ -0,0 +1,43 @@
+ .section .toc,"aw"
+x4t:
+ .quad x4
+x5t:
+ .quad x5
+x6t:
+ .quad x6
+
+ .section .sdata,"aw"
+x1:
+ .byte 1
+x2:
+ .byte 2
+x3:
+ .byte 3
+x4:
+ .byte 4
+x5:
+ .byte 5
+x6:
+ .byte 6
+
+ .globl _start
+ .text
+_start:
+# no need for got entry, optimise to nop,addi
+# note: ld doesn't yet do got optimisation, so we get nop,ld
+ addis 9,2,x1@got@ha
+ ld 9,x1@got@l(9)
+# must keep got entry, optimise to nop,addi,ld
+ addis 4,2,x2@got@ha
+ addi 5,4,x2@got@l
+ ld 6,0(5)
+
+# no need for toc entry, optimise to nop,addi
+ addis 9,2,x4t@toc@ha
+ ld 9,x4t@toc@l(9)
+# must keep toc entry, optimise to nop,addi,ld
+# if we had a reloc tying the ld to x5/x5t then we could throw away
+# the toc entry and optimise to nop,nop,addi
+ addis 4,2,x5t@toc@ha
+ addi 5,4,x5t@toc@l
+ ld 6,0(5)

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]