This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.
Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
[PATCH] ia64 libgcc routines for binary compatibility

From: Jakub Jelinek <jakub at redhat dot com>
To: Ulrich Drepper <drepper at redhat dot com>
Cc: Glibc hackers <libc-hacker at sources dot redhat dot com>
Date: Thu, 2 May 2002 19:08:23 +0200
Subject: [PATCH] ia64 libgcc routines for binary compatibility
Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!

I just skimmed shared libs/binaries and the following libgcc functions are
both reexported from random shared libraries and undefined in some other
shared libs or binaries, meaning they take it from the shared libraries
which mistakenly used to export them (before .hidden was added in gcc 3.1
to all libgcc.a routines).
All of these are used internally by glibc anyway, so IMHO just exporting
them normally will make various binaries/libraries tiny bit smaller,
but if you prefer to export them as @GLIBC_2.0 symbols only (ie. nobody
will be able to link against them), so be it.

2002-05-02  Jakub Jelinek  <jakub@redhat.com>

	* sysdeps/ia64/Makefile: Add ia64libgcc in csu subdir.
	* sysdeps/ia64/Versions (__divtf3, __divdf3, __divsf3, __divdi3,
	__moddi3, __udivdi3, __umoddi3, __multi3): Export at GLIBC_2.0.
	* sysdeps/ia64/ia64libgcc.S: New file.

--- libc/sysdeps/ia64/Makefile.jj	Fri Feb  1 11:01:18 2002
+++ libc/sysdeps/ia64/Makefile	Thu May  2 17:50:59 2002
@@ -9,6 +9,13 @@ ifeq ($(subdir), csu)
 CPPFLAGS-start.S = -D__ASSEMBLY__
 sysdep_routines += hp-timing
 static-only-routines += hp-timing
+
+ifeq (yes,$(build-shared))
+# Compatibility
+sysdep_routines += ia64libgcc
+shared-only-routines += ia64libgcc
+endif
+endif
 endif
 
 ifeq ($(subdir),elf)
--- libc/sysdeps/ia64/Versions.jj	Mon Feb  4 17:35:18 2002
+++ libc/sysdeps/ia64/Versions	Thu May  2 19:08:48 2002
@@ -5,3 +5,10 @@ ld {
     _dl_function_address;
   }
 }
+libc {
+  GLIBC_2.0 {
+    # Functions from libgcc.
+    __divtf3; __divdf3; __divsf3; __divdi3; __moddi3; __udivdi3; __umoddi3;
+    __multi3;
+  }
+}
--- libc/sysdeps/ia64/ia64libgcc.S.jj	Thu May  2 17:51:11 2002
+++ libc/sysdeps/ia64/ia64libgcc.S	Thu May  2 19:03:01 2002
@@ -0,0 +1,336 @@
+/* From the Intel IA-64 Optimization Guide, choose the minimum latency
+   alternative.  */
+
+#include <sysdep.h>
+#undef ret
+
+/* __divtf3
+   Compute a 80-bit IEEE double-extended quotient.
+   farg0 holds the dividend.  farg1 holds the divisor.  */
+
+ENTRY(__divtf3)
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fnma.s1 f11 = farg1, f10, f1
+(p6)	fma.s1 f12 = farg0, f10, f0
+	;;
+(p6)	fma.s1 f13 = f11, f11, f0
+(p6)	fma.s1 f14 = f11, f11, f11
+	;;
+(p6)	fma.s1 f11 = f13, f13, f11
+(p6)	fma.s1 f13 = f14, f10, f10
+	;;
+(p6)	fma.s1 f10 = f13, f11, f10
+(p6)	fnma.s1 f11 = farg1, f12, farg0
+	;;
+(p6)	fma.s1 f11 = f11, f10, f12
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fnma.s1 f12 = farg1, f11, farg0
+	;;
+(p6)	fma.s0 fret0 = f12, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+END(__divtf3)
+
+/* __divdf3
+   Compute a 64-bit IEEE double quotient.
+   farg0 holds the dividend.  farg1 holds the divisor.  */
+
+ENTRY(__divdf3)
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f11 = farg0, f10
+(p6)	fnma.s1 f12 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f11 = f12, f11, f11
+(p6)	fmpy.s1 f13 = f12, f12
+	;;
+(p6)	fma.s1 f10 = f12, f10, f10
+(p6)	fma.s1 f11 = f13, f11, f11
+	;;
+(p6)	fmpy.s1 f12 = f13, f13
+(p6)	fma.s1 f10 = f13, f10, f10
+	;;
+(p6)	fma.d.s1 f11 = f12, f11, f11
+(p6)	fma.s1 f10 = f12, f10, f10
+	;;
+(p6)	fnma.d.s1 f8 = farg1, f11, farg0
+	;;
+(p6)	fma.d fret0 = f8, f10, f11
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+END(__divdf3)
+
+/* __divsf3
+   Compute a 32-bit IEEE float quotient.
+   farg0 holds the dividend.  farg1 holds the divisor.  */
+
+ENTRY(__divsf3)
+	cmp.eq p7, p0 = r0, r0
+	frcpa.s0 f10, p6 = farg0, farg1
+	;;
+(p6)	cmp.ne p7, p0 = r0, r0
+	.pred.rel.mutex p6, p7
+(p6)	fmpy.s1 f8 = farg0, f10
+(p6)	fnma.s1 f9 = farg1, f10, f1
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.s1 f8 = f9, f8, f8
+(p6)	fmpy.s1 f9 = f9, f9
+	;;
+(p6)	fma.d.s1 f10 = f9, f8, f8
+	;;
+(p6)	fnorm.s.s0 fret0 = f10
+(p7)	mov fret0 = f10
+	br.ret.sptk rp
+	;;
+END(__divsf3)
+
+/* __divdi3
+   Compute a 64-bit integer quotient.
+   in0 holds the dividend.  in1 holds the divisor.  */
+
+ENTRY(__divdi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, so that they won't be treated as
+	   unsigned.  */
+	fcvt.xf f8 = f8
+	fcvt.xf f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	/* Round quotient to an integer.  */
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__divdi3)
+
+/* __moddi3
+   Compute a 64-bit integer modulus.
+   in0 holds the dividend (a).  in1 holds the divisor (b).  */
+
+ENTRY(__moddi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, so that they won't be treated as
+	   unsigned.  */
+	fcvt.xf f8 = f14
+	fcvt.xf f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	fcvt.fx.trunc.s1 f10 = f10
+	;;
+	/* r = q * (-b) + a  */
+	xma.l f10 = f10, f9, f14
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__moddi3)
+
+/* __udivdi3
+   Compute a 64-bit unsigned integer quotient.
+   in0 holds the dividend.  in1 holds the divisor.  */
+
+ENTRY(__udivdi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f8 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, to avoid FP software-assist faults.  */
+	fcvt.xuf.s1 f8 = f8
+	fcvt.xuf.s1 f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fnma.s1 f11 = f9, f10, f1
+(p6)	fmpy.s1 f12 = f8, f10
+	;;
+(p6)	fmpy.s1 f13 = f11, f11
+(p6)	fma.s1 f12 = f11, f12, f12
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	/* Round quotient to an unsigned integer.  */
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__udivdi3)
+
+/* __umoddi3
+   Compute a 64-bit unsigned integer modulus.
+   in0 holds the dividend (a).  in1 holds the divisor (b).  */
+
+ENTRY(__umoddi3)
+	.regstk 2,0,0,0
+	/* Transfer inputs to FP registers.  */
+	setf.sig f14 = in0
+	setf.sig f9 = in1
+	;;
+	/* Convert the inputs to FP, to avoid FP software assist faults.  */
+	fcvt.xuf.s1 f8 = f14
+	fcvt.xuf.s1 f9 = f9
+	;;
+	/* Compute the reciprocal approximation.  */
+	frcpa.s1 f10, p6 = f8, f9
+	;;
+	/* 3 Newton-Raphson iterations.  */
+(p6)	fmpy.s1 f12 = f8, f10
+(p6)	fnma.s1 f11 = f9, f10, f1
+	;;
+(p6)	fma.s1 f12 = f11, f12, f12
+(p6)	fmpy.s1 f13 = f11, f11
+	;;
+(p6)	fma.s1 f10 = f11, f10, f10
+(p6)	fma.s1 f11 = f13, f12, f12
+	;;
+	sub in1 = r0, in1
+(p6)	fma.s1 f10 = f13, f10, f10
+(p6)	fnma.s1 f12 = f9, f11, f8
+	;;
+	setf.sig f9 = in1
+(p6)	fma.s1 f10 = f12, f10, f11
+	;;
+	/* Round quotient to an unsigned integer.  */
+	fcvt.fxu.trunc.s1 f10 = f10
+	;;
+	/* r = q * (-b) + a  */
+	xma.l f10 = f10, f9, f14
+	;;
+	/* Transfer result to GP registers.  */
+	getf.sig ret0 = f10
+	br.ret.sptk rp
+	;;
+END(__umoddi3)
+
+/* __multi3
+   Compute a 128-bit multiply of 128-bit multiplicands.
+   in0/in1 holds one multiplicand (a), in2/in3 holds the other one (b).  */
+
+ENTRY(__multi3)
+	.regstk 4,0,0,0
+	setf.sig f6 = in1
+	movl r19 = 0xffffffff
+	setf.sig f7 = in2
+	;;
+	and r14 = r19, in0
+	;;
+	setf.sig f10 = r14
+	and r14 = r19, in2
+	xmpy.l f9 = f6, f7
+	;;
+	setf.sig f6 = r14
+	shr.u r14 = in0, 32
+	;;
+	setf.sig f7 = r14
+	shr.u r14 = in2, 32
+	;;
+	setf.sig f8 = r14
+	xmpy.l f11 = f10, f6
+	xmpy.l f6 = f7, f6
+	;;
+	getf.sig r16 = f11
+	xmpy.l f7 = f7, f8
+	;;
+	shr.u r14 = r16, 32
+	and r16 = r19, r16
+	getf.sig r17 = f6
+	setf.sig f6 = in0
+	;;
+	setf.sig f11 = r14
+	getf.sig r21 = f7
+	setf.sig f7 = in3
+	;;
+	xma.l f11 = f10, f8, f11
+	xma.l f6 = f6, f7, f9
+	;;
+	getf.sig r18 = f11
+	;;
+	add r18 = r18, r17
+	;;
+	and r15 = r19, r18
+	cmp.ltu p7, p6 = r18, r17
+	;;
+	getf.sig r22 = f6
+(p7)	adds r14 = 1, r19
+	;;
+(p7)	add r21 = r21, r14
+	shr.u r14 = r18, 32
+	shl r15 = r15, 32
+	;;
+	add r20 = r21, r14
+	;;
+	add ret0 = r15, r16
+	add ret1 = r22, r20
+	br.ret.sptk rp
+	;;
+END(__multi3)

	Jakub
Follow-Ups:
- Re: [PATCH] ia64 libgcc routines for binary compatibility
  - From: Ulrich Drepper
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]