This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
powerpc _tls_get_addr call optimization
- From: Alan Modra <amodra at bigpond dot net dot au>
- To: libc-alpha at sourceware dot org
- Date: Mon, 30 Mar 2009 18:25:41 +1030
- Subject: powerpc _tls_get_addr call optimization
This patch is glibc support for a PowerPC TLS optimization, inspired
by Alexandre Oliva's TLS optimization for other processors,
http://www.lsd.ic.unicamp.br/~oliva/writeups/TLS/RFC-TLSDESC-x86.txt
In essence, this optimization uses a zero module id in the TLS
descriptor to indicate that a TLS variable is allocated space in the
static TLS area. A special linker stub for __tls_get_addr plt calls
checks the module id, and returns the offset immediately if zero. The
linker communicates the fact that the special __tls_get_addr stub is
present by setting a new dynamic tag.
See http://sourceware.org/ml/binutils/2009-03/msg00498.html
Bootstrapped and regression tested powerpc-linux and powerpc64-linux.
One test regression present, elf/tst-tls-dlinfo with:
dlinfo RTLD_DI_TLS_DATA says 0x40002014 but should be unallocated
This is because TLS data is allocated on dlopen and relocating a
library. I'd appreciate some advice on what to do about this test
failure. I suppose I could write a configure test to see that ld
supports --no-tls-get-addr-optimize and pass that option when linking
the test.
The change to dl-open.c is to fix elf/tst-tls17 which fails with:
Inconsistency detected by ld.so: dl-reloc.c: 142: _dl_nothread_init_static_tls: Assertion `map->l_tls_modid <= dtv[-1].counter' failed!
tst-tls17 dlopens a dynamic library which itself has 20 dependent
dynamic libraries, each using TLS. With the powerpc optimization,
when dl_open_worker applies relocations for all these dependencies,
_dl_try_allocate_static_tls is called. Since they are in the process
of being relocated, _dl_try_allocate_static_tls can't call
_dl_update_slotinfo itself, so marks with l_need_tls_init. All well
and good so far. Then, after the relocation loop, dl_open_worker
calls _dl_add_to_slotinfo and _dl_update_slotinfo for each loaded
library. So, on the first call to _dl_update_slotinfo, the slotinfo
array has entries for previously loaded libraries, and just one newly
loaded library. _dl_update_slotinfo sees that the dtv generation
(dtv[0].counter) is out of date with respect to the slotinfo
generation for the new library, so updates the dtv, *and sets the dtv
generation to the new slotinfo generation*. This is where things go
bad. When _dl_update_slotinfo is called for the second and subsequent
newly loaded libraries, it does nothing because the dtv generation is
already updated.
Fixed by arranging to call _dl_update_slotinfo and dl_init_static_tls
after all the _dl_add_to_slotinfo calls have occurred. Notice also
that _dl_update_slotinfo only needs to be called once.
2009-03-30 Alan Modra <amodra@bigpond.net.au>
* elf/dl-open.c (dl_open_worker): Delay running _dl_update_slotinfo
until all loaded modules have called _dl_add_to_slotinfo, and then
only call it once.
* elf/elf.h (R_PPC_TLSGD, R_PPC_TLSLD, DT_PPC_TLSOPT): Define.
(R_PPC64_TLSGD, R_PPC64_TLSLD, DT_PPC64_TLSOPT): Define.
(DT_PPC_NUM, DT_PPC64_NUM): Increment.
* sysdeps/powerpc/powerpc32/dl-machine.h (elf_machine_rela): Optimize
TLS descriptors.
* sysdeps/powerpc/powerpc64/dl-machine.h (elf_machine_rela): Likewise.
Index: elf/dl-open.c
===================================================================
RCS file: /cvs/glibc/libc/elf/dl-open.c,v
retrieving revision 1.144
diff -u -p -r1.144 dl-open.c
--- elf/dl-open.c 16 Mar 2009 02:15:46 -0000 1.144
+++ elf/dl-open.c 28 Mar 2009 10:21:48 -0000
@@ -208,7 +208,7 @@ dl_open_worker (void *a)
int mode = args->mode;
struct link_map *new;
unsigned int i;
- bool any_tls = false;
+ struct link_map *any_tls = NULL;
struct link_map *call_map = NULL;
/* Check whether _dl_open() has been called from a valid DSO. */
@@ -490,28 +490,36 @@ dl_open_worker (void *a)
might have to increase its size. */
_dl_add_to_slotinfo (imap);
- if (imap->l_need_tls_init)
- {
- imap->l_need_tls_init = 0;
+ any_tls = imap;
+ }
+ }
+
+ if (any_tls != NULL)
+ {
#ifdef SHARED
- /* Update the slot information data for at least the
- generation of the DSO we are allocating data for. */
- _dl_update_slotinfo (imap->l_tls_modid);
+ /* Update the slot information data for at least the
+ generation of the DSO we are allocating data for. */
+ _dl_update_slotinfo (any_tls->l_tls_modid);
#endif
+ for (i = 0; i < new->l_searchlist.r_nlist; ++i)
+ {
+ struct link_map *imap = new->l_searchlist.r_list[i];
+
+ if (! imap->l_init_called && imap->l_need_tls_init)
+ {
+ imap->l_need_tls_init = 0;
+
GL(dl_init_static_tls) (imap);
assert (imap->l_need_tls_init == 0);
}
-
- /* We have to bump the generation counter. */
- any_tls = true;
}
- }
- /* Bump the generation number if necessary. */
- if (any_tls && __builtin_expect (++GL(dl_tls_generation) == 0, 0))
- _dl_fatal_printf (N_("\
+ /* Bump the generation number. */
+ if (__builtin_expect (++GL(dl_tls_generation) == 0, 0))
+ _dl_fatal_printf (N_("\
TLS generation counter wrapped! Please report this."));
+ }
/* Run the initializer functions of new objects. */
_dl_init (new, args->argc, args->argv, args->env);
Index: elf/elf.h
===================================================================
RCS file: /cvs/glibc/libc/elf/elf.h,v
retrieving revision 1.170
diff -u -p -r1.170 elf.h
--- elf/elf.h 13 Mar 2009 23:51:35 -0000 1.170
+++ elf/elf.h 24 Mar 2009 04:32:52 -0000
@@ -2023,6 +2023,8 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_PPC_GOT_DTPREL16_LO 92 /* half16* (sym+add)@got@dtprel@l */
#define R_PPC_GOT_DTPREL16_HI 93 /* half16* (sym+add)@got@dtprel@h */
#define R_PPC_GOT_DTPREL16_HA 94 /* half16* (sym+add)@got@dtprel@ha */
+#define R_PPC_TLSGD 95 /* none (sym+add)@tlsgd */
+#define R_PPC_TLSLD 96 /* none (sym+add)@tlsld */
/* Keep this the last entry. */
#define R_PPC_NUM 95
@@ -2066,7 +2068,8 @@ typedef Elf32_Addr Elf32_Conflict;
/* PowerPC specific values for the Dyn d_tag field. */
#define DT_PPC_GOT (DT_LOPROC + 0)
-#define DT_PPC_NUM 1
+#define DT_PPC_TLSOPT (DT_LOPROC + 1)
+#define DT_PPC_NUM 2
/* PowerPC64 relocations defined by the ABIs */
#define R_PPC64_NONE R_PPC_NONE
@@ -2179,6 +2182,8 @@ typedef Elf32_Addr Elf32_Conflict;
#define R_PPC64_DTPREL16_HIGHERA 104 /* half16 (sym+add)@dtprel@highera */
#define R_PPC64_DTPREL16_HIGHEST 105 /* half16 (sym+add)@dtprel@highest */
#define R_PPC64_DTPREL16_HIGHESTA 106 /* half16 (sym+add)@dtprel@highesta */
+#define R_PPC64_TLSGD 107 /* none (sym+add)@tlsgd */
+#define R_PPC64_TLSLD 108 /* none (sym+add)@tlsld */
/* Keep this the last entry. */
#define R_PPC64_NUM 107
@@ -2187,7 +2192,8 @@ typedef Elf32_Addr Elf32_Conflict;
#define DT_PPC64_GLINK (DT_LOPROC + 0)
#define DT_PPC64_OPD (DT_LOPROC + 1)
#define DT_PPC64_OPDSZ (DT_LOPROC + 2)
-#define DT_PPC64_NUM 3
+#define DT_PPC64_TLSOPT (DT_LOPROC + 3)
+#define DT_PPC64_NUM 4
/* ARM specific declarations */
Index: sysdeps/powerpc/powerpc32/dl-machine.h
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/powerpc32/dl-machine.h,v
retrieving revision 1.15
diff -u -p -r1.15 dl-machine.h
--- sysdeps/powerpc/powerpc32/dl-machine.h 27 Oct 2006 23:11:46 -0000 1.15
+++ sysdeps/powerpc/powerpc32/dl-machine.h 24 Mar 2009 04:32:56 -0000
@@ -339,6 +339,31 @@ elf_machine_rela (struct link_map *map,
# endif
case R_PPC_DTPMOD32:
+ if (map->l_info[DT_PPC(TLSOPT)] != 0)
+ {
+ if (!NOT_BOOTSTRAP)
+ {
+ reloc_addr[0] = 0;
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ break;
+ }
+ else if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ reloc_addr[0] = 0;
+ /* Set up for local dynamic. */
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ break;
+ }
+ }
+ }
if (!NOT_BOOTSTRAP)
/* During startup the dynamic linker is always index 1. */
*reloc_addr = 1;
@@ -348,6 +373,26 @@ elf_machine_rela (struct link_map *map,
*reloc_addr = sym_map->l_tls_modid;
break;
case R_PPC_DTPREL32:
+ if (map->l_info[DT_PPC(TLSOPT)] != 0)
+ {
+ if (!NOT_BOOTSTRAP)
+ {
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ break;
+ }
+ else if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ break;
+ }
+ }
+ }
/* During relocation all TLS symbols are defined and used.
Therefore the offset is already correct. */
if (NOT_BOOTSTRAP && sym_map != NULL)
Index: sysdeps/powerpc/powerpc64/dl-machine.h
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/powerpc/powerpc64/dl-machine.h,v
retrieving revision 1.27
diff -u -p -r1.27 dl-machine.h
--- sysdeps/powerpc/powerpc64/dl-machine.h 11 Apr 2008 00:02:44 -0000 1.27
+++ sysdeps/powerpc/powerpc64/dl-machine.h 24 Mar 2009 04:32:56 -0000
@@ -574,6 +574,31 @@ elf_machine_rela (struct link_map *map,
#if !defined RTLD_BOOTSTRAP || USE___THREAD
case R_PPC64_DTPMOD64:
+ if (map->l_info[DT_PPC64(TLSOPT)] != 0)
+ {
+# ifdef RTLD_BOOTSTRAP
+ reloc_addr[0] = 0;
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ return;
+# else
+ if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ reloc_addr[0] = 0;
+ /* Set up for local dynamic. */
+ reloc_addr[1] = (sym_map->l_tls_offset - TLS_TP_OFFSET
+ + TLS_DTV_OFFSET);
+ return;
+ }
+ }
+# endif
+ }
# ifdef RTLD_BOOTSTRAP
/* During startup the dynamic linker is always index 1. */
*reloc_addr = 1;
@@ -586,6 +611,26 @@ elf_machine_rela (struct link_map *map,
return;
case R_PPC64_DTPREL64:
+ if (map->l_info[DT_PPC64(TLSOPT)] != 0)
+ {
+# ifdef RTLD_BOOTSTRAP
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ return;
+# else
+ if (sym_map != NULL)
+ {
+# ifndef SHARED
+ CHECK_STATIC_TLS (map, sym_map);
+# else
+ if (TRY_STATIC_TLS (map, sym_map))
+# endif
+ {
+ *reloc_addr = TLS_TPREL_VALUE (sym_map, sym, reloc);
+ return;
+ }
+ }
+# endif
+ }
/* During relocation all TLS symbols are defined and used.
Therefore the offset is already correct. */
# ifndef RTLD_BOOTSTRAP
--
Alan Modra
Australia Development Lab, IBM