This is the mail archive of the binutils@sourceware.org mailing list for the binutils project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

powerpc64 large toc optimisation


This patch implements two powerpc64 ld optimisations.  Firstly, I'd
left the high part insn of optimised large toc code sequences alone.
ie. given
  addis ra,r2,0; addi rb,ra,x
we'd optimise to
  addis ra,r2,0; addi rb,r2,x
We now optimise to
  nop;           addi rb,r2,x

Secondly, if large toc offsets are built up in a register before being
added to r2, we now optimise that sequence too.

bfd/
	* elf64-ppc.c (ha_reloc_match): Allow matches to other than r2.
	(ppc64_elf_relocate_section): Nop out high part insn of large toc
	code sequence when the high part of offset is zero.
ld/testsuite/
	* ld-powerpc/tocopt.s, * ld-powerpc/tocopt.d: New test.
	* ld-powerpc/powerpc.exp: Run it.

Index: bfd/elf64-ppc.c
===================================================================
RCS file: /cvs/src/src/bfd/elf64-ppc.c,v
retrieving revision 1.333
diff -u -p -r1.333 elf64-ppc.c
--- bfd/elf64-ppc.c	29 Jul 2010 07:35:58 -0000	1.333
+++ bfd/elf64-ppc.c	2 Aug 2010 07:36:55 -0000
@@ -11397,12 +11397,14 @@ ppc64_elf_action_discarded (asection *se
 
 /* REL points to a low-part reloc on a largetoc instruction sequence.
    Find the matching high-part reloc instruction and verify that it
-   is addis REG,r2,x.  If so, return a pointer to the high-part reloc.  */
+   is addis REG,x,imm.  If so, set *REG to x and return a pointer to
+   the high-part reloc.  */
 
 static const Elf_Internal_Rela *
 ha_reloc_match (const Elf_Internal_Rela *relocs,
 		const Elf_Internal_Rela *rel,
-		unsigned int reg,
+		unsigned int *reg,
+		bfd_boolean match_addend,
 		const bfd *input_bfd,
 		const bfd_byte *contents)
 {
@@ -11434,14 +11436,17 @@ ha_reloc_match (const Elf_Internal_Rela 
 
   while (--rel >= relocs)
     if (rel->r_info == r_info_ha
-	&& rel->r_addend == r_addend)
+	&& (!match_addend
+	    || rel->r_addend == r_addend))
       {
 	const bfd_byte *p = contents + (rel->r_offset & ~3);
 	unsigned int insn = bfd_get_32 (input_bfd, p);
-	if ((insn & ((0x3f << 26) | (0x1f << 16)))
-	    == ((15u << 26) | (2 << 16)) /* addis rt,r2,x */
-	    && (insn & (0x1f << 21)) == (reg << 21))
-	  return rel;
+	if ((insn & (0x3f << 26)) == (15u << 26) /* addis rt,x,imm */
+	    && (insn & (0x1f << 21)) == (*reg << 21))
+	  {
+	    *reg = (insn >> 16) & 0x1f;
+	    return rel;
+	  }
 	break;
       }
   return NULL;
@@ -11494,7 +11499,9 @@ ppc64_elf_relocate_section (bfd *output_
   Elf_Internal_Rela outrel;
   bfd_byte *loc;
   struct got_entry **local_got_ents;
+  unsigned char *ha_opt;
   bfd_vma TOCstart;
+  bfd_boolean no_ha_opt;
   bfd_boolean ret = TRUE;
   bfd_boolean is_opd;
   /* Disabled until we sort out how ld should choose 'y' vs 'at'.  */
@@ -11520,6 +11527,8 @@ ppc64_elf_relocate_section (bfd *output_
   symtab_hdr = &elf_symtab_hdr (input_bfd);
   sym_hashes = elf_sym_hashes (input_bfd);
   is_opd = ppc64_elf_section_data (input_section)->sec_type == sec_opd;
+  ha_opt = NULL;
+  no_ha_opt = FALSE;
 
   rel = relocs;
   relend = relocs + input_section->reloc_count;
@@ -12945,7 +12954,7 @@ ppc64_elf_relocate_section (bfd *output_
 	case R_PPC64_GOT_DTPREL16_HA:
 	case R_PPC64_GOT16_HA:
 	case R_PPC64_TOC16_HA:
-	  /* For now we don't nop out the first instruction.  */
+	  /* nop is done later.  */
 	  break;
 
 	case R_PPC64_GOT_TLSLD16_LO:
@@ -12980,12 +12989,31 @@ ppc64_elf_relocate_section (bfd *output_
 		      && ((insn & 3) == 0 || (insn & 3) == 3)))
 		{
 		  unsigned int reg = (insn >> 16) & 0x1f;
-		  if (ha_reloc_match (relocs, rel, reg, input_bfd, contents))
+		  const Elf_Internal_Rela *ha;
+		  bfd_boolean match_addend;
+
+		  match_addend = (sym != NULL
+				  && ELF_ST_TYPE (sym->st_info) == STT_SECTION);
+		  ha = ha_reloc_match (relocs, rel, &reg, match_addend,
+				       input_bfd, contents);
+		  if (ha != NULL)
 		    {
 		      insn &= ~(0x1f << 16);
-		      insn |= 2 << 16;
+		      insn |= reg << 16;
 		      bfd_put_32 (input_bfd, insn, p);
+		      if (ha_opt == NULL)
+			{
+			  ha_opt = bfd_zmalloc (input_section->reloc_count);
+			  if (ha_opt == NULL)
+			    return FALSE;
+			}
+		      ha_opt[ha - relocs] = 1;
 		    }
+		  else
+		    /* If we don't find a matching high part insn,
+		       something is fishy.  Refuse to nop any high
+		       part insn in this section.  */
+		    no_ha_opt = TRUE;
 		}
 	    }
 	  break;
@@ -13143,6 +13171,23 @@ ppc64_elf_relocate_section (bfd *output_
 	}
     }
 
+  if (ha_opt != NULL)
+    {
+      if (!no_ha_opt)
+	{
+	  unsigned char *opt = ha_opt;
+	  rel = relocs;
+	  relend = relocs + input_section->reloc_count;
+	  for (; rel < relend; opt++, rel++)
+	    if (*opt != 0)
+	      {
+		bfd_byte *p = contents + (rel->r_offset & ~3);
+		bfd_put_32 (input_bfd, NOP, p);
+	      }
+	}
+      free (ha_opt);
+    }
+
   /* If we're emitting relocations, then shortly after this function
      returns, reloc offsets and addends for this section will be
      adjusted.  Worse, reloc symbol indices will be for the output
Index: ld/testsuite/ld-powerpc/powerpc.exp
===================================================================
RCS file: /cvs/src/src/ld/testsuite/ld-powerpc/powerpc.exp,v
retrieving revision 1.31
diff -u -p -r1.31 powerpc.exp
--- ld/testsuite/ld-powerpc/powerpc.exp	1 Apr 2010 10:02:28 -0000	1.31
+++ ld/testsuite/ld-powerpc/powerpc.exp	5 Aug 2010 14:31:49 -0000
@@ -178,6 +178,8 @@ set ppc64elftests {
       "tlsmark"}
     {"sym@tocbase" "-shared -melf64ppc" "-a64" {symtocbase-1.s symtocbase-2.s}
 	{{objdump -dj.data symtocbase.d}} "symtocbase.so"}
+    {"TOC opt" "-melf64ppc" "-a64"  {tocopt.s}
+     {{objdump -s tocopt.d}} "tocopt"}
 }
 
 
Index: ld/testsuite/ld-powerpc/tocopt.d
===================================================================
RCS file: ld/testsuite/ld-powerpc/tocopt.d
diff -N ld/testsuite/ld-powerpc/tocopt.d
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ ld/testsuite/ld-powerpc/tocopt.d	5 Aug 2010 14:31:49 -0000
@@ -0,0 +1,14 @@
+
+.*:     file format .*
+
+Contents of section \.text:
+ 100000b0 60000000 e9228018 60000000 38a28020  .*
+ 100000c0 e8c50000 60000000 3ba08028 7c62e82a  .*
+ 100000d0 60000000 39228033 60000000 38a28008  .*
+ 100000e0 e8c50000 60000000 3ba08010 7c62e82a  .*
+Contents of section \.got:
+ 100100f0 00000000 100180f0 00000000 10010124  .*
+ 10010100 00000000 10010125 00000000 10010120  .*
+ 10010110 00000000 10010121 00000000 10010122  .*
+Contents of section \.sdata:
+ 10010120 01020304 0506                        .*
Index: ld/testsuite/ld-powerpc/tocopt.s
===================================================================
RCS file: ld/testsuite/ld-powerpc/tocopt.s
diff -N ld/testsuite/ld-powerpc/tocopt.s
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ ld/testsuite/ld-powerpc/tocopt.s	5 Aug 2010 14:31:49 -0000
@@ -0,0 +1,51 @@
+ .section .toc,"aw"
+x4t:
+ .quad x4
+x5t:
+ .quad x5
+x6t:
+ .quad x6
+
+ .section .sdata,"aw"
+x1:
+ .byte 1
+x2:
+ .byte 2
+x3:
+ .byte 3
+x4:
+ .byte 4
+x5:
+ .byte 5
+x6:
+ .byte 6
+
+ .globl _start
+ .text
+_start:
+# no need for got entry, optimise to nop,addi
+# note: ld doesn't yet do got optimisation, so we get nop,ld
+ addis 9,2,x1@got@ha
+ ld 9,x1@got@l(9)
+# must keep got entry, optimise to nop,addi,ld
+ addis 4,2,x2@got@ha
+ addi 5,4,x2@got@l
+ ld 6,0(5)
+# must keep got entry, optimise to nop,li,ldx
+ lis 29,x3@got@ha
+ addi 29,29,x3@got@l
+ ldx 3,2,29
+
+# no need for toc entry, optimise to nop,addi
+ addis 9,2,x4t@toc@ha
+ ld 9,x4t@toc@l(9)
+# must keep toc entry, optimise to nop,addi,ld
+# if we had a reloc tying the ld to x5/x5t then we could throw away
+# the toc entry and optimise to nop,nop,addi
+ addis 4,2,x5t@toc@ha
+ addi 5,4,x5t@toc@l
+ ld 6,0(5)
+# must keep toc entry, optimise to nop,li,ldx
+ lis 29,x6t@toc@ha
+ addi 29,29,x6t@toc@l
+ ldx 3,2,29

-- 
Alan Modra
Australia Development Lab, IBM


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]