This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/3] Refactor trampoline code.


Hi, I returned to storing floating point registers. A first step is
refactor code so it can be factored out in second patch and sse saving
logic in third patch. A code now is inconsistent in several ways:

_dl_runtime_resolve does not save r10 and r11 but other do.
_dl_runtime_resolve does not clober rax but others do. This could be
problem with variadic calls. A relevant part from abi is:

"
Note that %r11 is neither required to be preserved, nor is it used to pass arguments.
Making this register available as scratch register means that code in the PLT
need not spill any registers when computing the address to which control needs to be transferred.
%rax is used to indicate the number of vector arguments passed to a function requiring a variable
number of arguments. %r10 is used for passing a functionâs static chain pointer
"

Could somebody clarify? How does r10 static chain pointer part apply?


Following passes tests and should work if we assume that r10 and r11 are scratch registers.

	* sysdeps/x86_64/dl-tlsdesc.S: Refactor trampolines.
	* sysdeps/x86_64/dl-trampoline.S: Likewise.

---
 sysdeps/x86_64/dl-tlsdesc.S    |   53 ++++++++++++++++------------------------
 sysdeps/x86_64/dl-trampoline.S |    8 ++++--
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S
index de5219a..c439c7e 100644
--- a/sysdeps/x86_64/dl-tlsdesc.S
+++ b/sysdeps/x86_64/dl-tlsdesc.S
@@ -98,43 +98,37 @@ _dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax"))
 _dl_tlsdesc_dynamic:
 	/* Preserve call-clobbered registers that we modify.
 	   We need two scratch regs anyway.  */
-	movq	%rsi, -16(%rsp)
-	movq	%fs:DTV_OFFSET, %rsi
-	movq	%rdi, -8(%rsp)
-	movq	TLSDESC_ARG(%rax), %rdi
-	movq	(%rsi), %rax
-	cmpq	%rax, TLSDESC_GEN_COUNT(%rdi)
+	movq	%fs:DTV_OFFSET, %r10
+	movq	TLSDESC_ARG(%rax), %r11
+	movq	(%r10), %rax
+	cmpq	%rax, TLSDESC_GEN_COUNT(%r11)
 	ja	.Lslow
-	movq	TLSDESC_MODID(%rdi), %rax
+	movq	TLSDESC_MODID(%r11), %rax
 	salq	$4, %rax
-	movq	(%rax,%rsi), %rax
+	movq	(%rax,%r10), %rax
 	cmpq	$-1, %rax
 	je	.Lslow
-	addq	TLSDESC_MODOFF(%rdi), %rax
+	addq	TLSDESC_MODOFF(%r11), %rax
 .Lret:
-	movq	-16(%rsp), %rsi
 	subq	%fs:0, %rax
-	movq	-8(%rsp), %rdi
 	ret
 .Lslow:
 	/* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9,
-	   r10 and r11.  Also, align the stack, that's off by 8 bytes.	*/
+	   Also, align the stack, that's off by 8 bytes.	*/
 	subq	$72, %rsp
 	cfi_adjust_cfa_offset (72)
 	movq	%rdx, 8(%rsp)
 	movq	%rcx, 16(%rsp)
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
-	movq	%r10, 40(%rsp)
-	movq	%r11, 48(%rsp)
-	/* %rdi already points to the tlsinfo data structure.  */
+
+	movq %r11, %rdi
 	call	__tls_get_addr@PLT
+
 	movq	8(%rsp), %rdx
 	movq	16(%rsp), %rcx
 	movq	24(%rsp), %r8
 	movq	32(%rsp), %r9
-	movq	40(%rsp), %r10
-	movq	48(%rsp), %r11
 	addq	$72, %rsp
 	cfi_adjust_cfa_offset (-72)
 	jmp	.Lret
@@ -164,27 +158,26 @@ _dl_tlsdesc_dynamic:
 _dl_tlsdesc_resolve_rela:
 	cfi_adjust_cfa_offset (8)
 	/* Save all call-clobbered registers.  */
+	movq (%rsp), %r11
 	subq	$72, %rsp
 	cfi_adjust_cfa_offset (72)
 	movq	%rax, (%rsp)
 	movq	%rdi, 8(%rsp)
-	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
 	movq	%rsi, 16(%rsp)
-	movq	72(%rsp), %rsi	/* Pass link_map* in %rsi.  */
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
-	movq	%r10, 40(%rsp)
-	movq	%r11, 48(%rsp)
 	movq	%rdx, 56(%rsp)
 	movq	%rcx, 64(%rsp)
+
+	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
+  movq  %r11, %rsi /* Pass link_map* in %rsi.  */
 	call	_dl_tlsdesc_resolve_rela_fixup
+
 	movq	(%rsp), %rax
 	movq	8(%rsp), %rdi
 	movq	16(%rsp), %rsi
 	movq	24(%rsp), %r8
 	movq	32(%rsp), %r9
-	movq	40(%rsp), %r10
-	movq	48(%rsp), %r11
 	movq	56(%rsp), %rdx
 	movq	64(%rsp), %rcx
 	addq	$80, %rsp
@@ -210,35 +203,31 @@ _dl_tlsdesc_resolve_rela:
 	cfi_startproc
 	.align 16
 _dl_tlsdesc_resolve_hold:
-0:
 	/* Save all call-clobbered registers.  */
 	subq	$72, %rsp
 	cfi_adjust_cfa_offset (72)
 	movq	%rax, (%rsp)
 	movq	%rdi, 8(%rsp)
-	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
 	movq	%rsi, 16(%rsp)
 	/* Pass _dl_tlsdesc_resolve_hold's address in %rsi.  */
-	leaq	. - _dl_tlsdesc_resolve_hold(%rip), %rsi
 	movq	%r8, 24(%rsp)
 	movq	%r9, 32(%rsp)
-	movq	%r10, 40(%rsp)
-	movq	%r11, 48(%rsp)
 	movq	%rdx, 56(%rsp)
 	movq	%rcx, 64(%rsp)
+
+	movq	%rax, %rdi	/* Pass tlsdesc* in %rdi.  */
+	leaq	. - _dl_tlsdesc_resolve_hold(%rip), %rsi
 	call	_dl_tlsdesc_resolve_hold_fixup
-1:
+
 	movq	(%rsp), %rax
 	movq	8(%rsp), %rdi
 	movq	16(%rsp), %rsi
 	movq	24(%rsp), %r8
 	movq	32(%rsp), %r9
-	movq	40(%rsp), %r10
-	movq	48(%rsp), %r11
 	movq	56(%rsp), %rdx
 	movq	64(%rsp), %rcx
 	addq	$72, %rsp
 	cfi_adjust_cfa_offset (-72)
-	jmp	*(%eax)
+	jmp	*(%rax)
 	cfi_endproc
 	.size	_dl_tlsdesc_resolve_hold, .-_dl_tlsdesc_resolve_hold
diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S
index a25e390..4212145 100644
--- a/sysdeps/x86_64/dl-trampoline.S
+++ b/sysdeps/x86_64/dl-trampoline.S
@@ -30,6 +30,8 @@
 	.align 16
 	cfi_startproc
 _dl_runtime_resolve:
+	movq (%rsp), %r10
+	movq 8(%rsp), %r11
 	cfi_adjust_cfa_offset(16) # Incorporate PLT
 	subq $56,%rsp
 	cfi_adjust_cfa_offset(56)
@@ -40,10 +42,12 @@ _dl_runtime_resolve:
 	movq %rdi, 32(%rsp)
 	movq %r8, 40(%rsp)
 	movq %r9, 48(%rsp)
-	movq 64(%rsp), %rsi	# Copy args pushed by PLT in register.
-	movq 56(%rsp), %rdi	# %rdi: link_map, %rsi: reloc_index
+
+	movq %r11, %rsi	# Copy args pushed by PLT in register.
+	movq %r10, %rdi	# %rdi: link_map, %rsi: reloc_index
 	call _dl_fixup		# Call resolver.
 	movq %rax, %r11		# Save return value
+
 	movq 48(%rsp), %r9	# Get register content back.
 	movq 40(%rsp), %r8
 	movq 32(%rsp), %rdi
-- 
1.7.10.4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]