[PATCH] Fix powl on x86_64 [BZ #258]

Wed Jul 14 18:17:00 GMT 2004

Hi!

i386 will need similar treatment.
Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j?
or vice versa, maybe the comparisons could be coded more efficiently.

2004-07-14  Jakub Jelinek  <jakub@redhat.com>

	[BZ #258]
	* math/libm-test.inc (max_value, min_value): New variables.
	(initialize): Initialize them.
	(pow_test): Add a couple of new tests.
	* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Don't generate
	invalid exception if |y| is >= 1L<<63.  If y*log2(x) overflows
	to +-inf, return still +inf/+0 instead of NaN.  Replace
	fldl MO(zero) with fldz and fldl MO(one) with fld1.

--- libc/math/libm-test.inc.jj	2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc	2004-07-14 20:09:24.475429821 +0200
@@ -169,7 +169,7 @@ static int output_points;	/* Should the 
 static int ignore_max_ulp;	/* Should we ignore max_ulp?  */
 
 static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
 
 static FLOAT max_error, real_max_error, imag_max_error;
 
@@ -3593,6 +3593,25 @@ pow_test (void)
   TEST_ff_f (pow, -1, plus_infty, 1);
   TEST_ff_f (pow, 1, minus_infty, 1);
   TEST_ff_f (pow, -1, minus_infty, 1);
+  TEST_ff_f (pow, 1, 1, 1);
+  TEST_ff_f (pow, 1, -1, 1);
+  TEST_ff_f (pow, 1, 1.25, 1);
+  TEST_ff_f (pow, 1, -1.25, 1);
+  TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+  /* pow (x, +-0) == 1.  */
+  TEST_ff_f (pow, plus_infty, 0, 1);
+  TEST_ff_f (pow, plus_infty, minus_zero, 1);
+  TEST_ff_f (pow, minus_infty, 0, 1);
+  TEST_ff_f (pow, minus_infty, minus_zero, 1);
+  TEST_ff_f (pow, 32.75L, 0, 1);
+  TEST_ff_f (pow, 32.75L, minus_zero, 1);
+  TEST_ff_f (pow, -32.75L, 0, 1);
+  TEST_ff_f (pow, -32.75L, minus_zero, 1);
+  TEST_ff_f (pow, 0x1p72L, 0, 1);
+  TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+  TEST_ff_f (pow, 0x1p-72L, 0, 1);
+  TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
 
   TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
   TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3628,10 @@ pow_test (void)
   TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
   TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
 
+  TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+  TEST_ff_f (pow, 10, -0x1p72L, 0);
+  TEST_ff_f (pow, max_value, max_value, plus_infty);
+  TEST_ff_f (pow, 10, -max_value, 0);
 
   TEST_ff_f (pow, 0, 1, 0);
   TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3646,8 @@ pow_test (void)
 
   TEST_ff_f (pow, minus_zero, 2, 0);
   TEST_ff_f (pow, minus_zero, 11.1L, 0);
+  TEST_ff_f (pow, 0, plus_infty, 0);
+  TEST_ff_f (pow, minus_zero, plus_infty, 0);
 
 #ifndef TEST_INLINE
   /* pow (x, +inf) == +inf for |x| > 1.  */
@@ -3667,6 +3692,11 @@ pow_test (void)
   /* pow (-0, y) == +0 for y > 0 and not an odd integer.  */
   TEST_ff_f (pow, minus_zero, 4, 0.0);
 
+  TEST_ff_f (pow, 16, 0.25L, 2);
+  TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+  TEST_ff_f (pow, 2, 4, 16);
+  TEST_ff_f (pow, 256, 8, 0x1p64L);
+
   TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
 
 #if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4342,18 @@ initialize (void)
 		       HUGE_VALL, HUGE_VAL, HUGE_VALF);
   minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
 			-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+  max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+		      LDBL_MAX, DBL_MAX, FLT_MAX);
+  min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+		      LDBL_MIN, DBL_MIN, FLT_MIN);
 
   (void) &plus_zero;
   (void) &nan_value;
   (void) &minus_zero;
   (void) &plus_infty;
   (void) &minus_infty;
+  (void) &max_value;
+  (void) &min_value;
 
   /* Clear all exceptions.  From now on we must not get random exceptions.  */
   feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj	2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S	2004-07-14 20:02:06.717899272 +0200
@@ -1,5 +1,5 @@
 /* ix87 specific implementation of pow function.
-   Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
 
@@ -48,6 +48,9 @@ one:	.double 1.0
 	ASM_TYPE_DIRECTIVE(limit,@object)
 limit:	.double 0.29
 	ASM_SIZE_DIRECTIVE(limit)
+	ASM_TYPE_DIRECTIVE(p63,@object)
+p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+	ASM_SIZE_DIRECTIVE(p63)
 
 #ifdef PIC
 #define MO(op) op##(%rip)
@@ -87,6 +90,14 @@ ENTRY(__ieee754_powl)
 
 	fxch			// y : x
 
+	/* fistpll raises invalid exception for |y| >= 1L<<63.  */
+	fldl	MO(p63)		// 1L<<63 : y : x
+	fld	%st(1)		// y : 1L<<63 : y : x
+	fabs			// |y| : 1L<<63 : y : x
+	fcomip	%st(1), %st	// 1L<<63 : y : x
+	fstp	%st(0)		// y : x
+	jnc	2f
+
 	/* First see whether `y' is a natural number.  In this case we
 	   can use a more precise algorithm.  */
 	fld	%st		// y : y : x
@@ -105,7 +116,7 @@ ENTRY(__ieee754_powl)
 	negl	%eax
 	adcl	$0, %edx
 	negl	%edx
-4:	fldl	MO(one)		// 1 : x
+4:	fld1			// 1 : x
 	fxch
 
 6:	shrdl	$1, %edx, %eax
@@ -123,7 +134,7 @@ ENTRY(__ieee754_powl)
 
 	/* y is Â±NAN */
 30:	fldt	8(%rsp)		// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fucomip	%st(1),%st	// x : y
 	je	31f
 	fxch			// y : x
@@ -133,7 +144,7 @@ ENTRY(__ieee754_powl)
 	.align ALIGNARG(4)
 2:	/* y is a real number.  */
 	fxch			// x : y
-	fldl	MO(one)		// 1.0 : x : y
+	fld1			// 1.0 : x : y
 	fld	%st(1)		// x : 1.0 : x : y
 	fsub	%st(1)		// x-1 : 1.0 : x : y
 	fabs			// |x-1| : 1.0 : x : y
@@ -148,6 +159,11 @@ ENTRY(__ieee754_powl)
 
 7:	fyl2x			// log2(x) : y
 8:	fmul	%st(1)		// y*log2(x) : y
+	fxam
+	fnstsw
+	andb	$0x45, %ah
+	cmpb	$0x05, %ah      // is y*log2(x) == Â±inf ?
+	je	28f
 	fst	%st(1)		// y*log2(x) : y*log2(x)
 	frndint			// int(y*log2(x)) : y*log2(x)
 	fsubr	%st, %st(1)	// int(y*log2(x)) : fract(y*log2(x))
@@ -158,11 +174,16 @@ ENTRY(__ieee754_powl)
 	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
 	ret
 
+28:	fstp	%st(1)		// y*log2(x)
+	fld1			// 1 : y*log2(x)
+	fscale			// 2^(y*log2(x)) : y*log2(x)
+	fstp	%st(1)		// 2^(y*log2(x))
+	ret
 
 	// pow(x,Â±0) = 1
 	.align ALIGNARG(4)
 11:	fstp	%st(0)		// pop y
-	fldl	MO(one)
+	fld1
 	ret
 
 	// y == Â±inf
@@ -191,7 +212,7 @@ ENTRY(__ieee754_powl)
 	ret
 
 	.align ALIGNARG(4)
-14:	fldl	MO(one)
+14:	fld1
 	ret
 
 	.align ALIGNARG(4)
@@ -275,7 +296,7 @@ ENTRY(__ieee754_powl)
 	jz	27f		// jump if not odd
 	// It's an odd integer.
 	// Raise divide-by-zero exception and get minus infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	fchs
 	ret
@@ -283,7 +304,7 @@ ENTRY(__ieee754_powl)
 25:	fstp	%st(0)
 26:
 27:	// Raise divide-by-zero exception and get infinity value.
-	fldl	MO(one)
+	fld1
 	fdivl	MO(zero)
 	ret
 
@@ -310,7 +331,7 @@ ENTRY(__ieee754_powl)
 
 22:	fstp	%st(0)
 23:
-24:	fldl	MO(zero)
+24:	fldz
 	ret
 
 END(__ieee754_powl)

	Jakub