[PATCH] Fix powl on x86_64 [BZ #258]
Jakub Jelinek
jakub@redhat.com
Wed Jul 14 18:17:00 GMT 2004
Hi!
i386 will need similar treatment.
Not sure if fld*/f{,u}comi{,p}/j? is faster than fcompl/fnstw/testing %ah/j?
or vice versa, maybe the comparisons could be coded more efficiently.
2004-07-14 Jakub Jelinek <jakub@redhat.com>
[BZ #258]
* math/libm-test.inc (max_value, min_value): New variables.
(initialize): Initialize them.
(pow_test): Add a couple of new tests.
* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Don't generate
invalid exception if |y| is >= 1L<<63. If y*log2(x) overflows
to +-inf, return still +inf/+0 instead of NaN. Replace
fldl MO(zero) with fldz and fldl MO(one) with fld1.
--- libc/math/libm-test.inc.jj 2004-02-13 12:28:07.000000000 +0100
+++ libc/math/libm-test.inc 2004-07-14 20:09:24.475429821 +0200
@@ -169,7 +169,7 @@ static int output_points; /* Should the
static int ignore_max_ulp; /* Should we ignore max_ulp? */
static FLOAT minus_zero, plus_zero;
-static FLOAT plus_infty, minus_infty, nan_value;
+static FLOAT plus_infty, minus_infty, nan_value, max_value, min_value;
static FLOAT max_error, real_max_error, imag_max_error;
@@ -3593,6 +3593,25 @@ pow_test (void)
TEST_ff_f (pow, -1, plus_infty, 1);
TEST_ff_f (pow, 1, minus_infty, 1);
TEST_ff_f (pow, -1, minus_infty, 1);
+ TEST_ff_f (pow, 1, 1, 1);
+ TEST_ff_f (pow, 1, -1, 1);
+ TEST_ff_f (pow, 1, 1.25, 1);
+ TEST_ff_f (pow, 1, -1.25, 1);
+ TEST_ff_f (pow, 1, 0x1p72L, 1);
+
+ /* pow (x, +-0) == 1. */
+ TEST_ff_f (pow, plus_infty, 0, 1);
+ TEST_ff_f (pow, plus_infty, minus_zero, 1);
+ TEST_ff_f (pow, minus_infty, 0, 1);
+ TEST_ff_f (pow, minus_infty, minus_zero, 1);
+ TEST_ff_f (pow, 32.75L, 0, 1);
+ TEST_ff_f (pow, 32.75L, minus_zero, 1);
+ TEST_ff_f (pow, -32.75L, 0, 1);
+ TEST_ff_f (pow, -32.75L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p72L, 0, 1);
+ TEST_ff_f (pow, 0x1p72L, minus_zero, 1);
+ TEST_ff_f (pow, 0x1p-72L, 0, 1);
+ TEST_ff_f (pow, 0x1p-72L, minus_zero, 1);
TEST_ff_f (pow, -0.1L, 1.1L, nan_value, INVALID_EXCEPTION);
TEST_ff_f (pow, -0.1L, -1.1L, nan_value, INVALID_EXCEPTION);
@@ -3609,6 +3628,10 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, -2, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
TEST_ff_f (pow, minus_zero, -11.1L, plus_infty, DIVIDE_BY_ZERO_EXCEPTION);
+ TEST_ff_f (pow, 0x1p72L, 0x1p72L, plus_infty);
+ TEST_ff_f (pow, 10, -0x1p72L, 0);
+ TEST_ff_f (pow, max_value, max_value, plus_infty);
+ TEST_ff_f (pow, 10, -max_value, 0);
TEST_ff_f (pow, 0, 1, 0);
TEST_ff_f (pow, 0, 11, 0);
@@ -3623,6 +3646,8 @@ pow_test (void)
TEST_ff_f (pow, minus_zero, 2, 0);
TEST_ff_f (pow, minus_zero, 11.1L, 0);
+ TEST_ff_f (pow, 0, plus_infty, 0);
+ TEST_ff_f (pow, minus_zero, plus_infty, 0);
#ifndef TEST_INLINE
/* pow (x, +inf) == +inf for |x| > 1. */
@@ -3667,6 +3692,11 @@ pow_test (void)
/* pow (-0, y) == +0 for y > 0 and not an odd integer. */
TEST_ff_f (pow, minus_zero, 4, 0.0);
+ TEST_ff_f (pow, 16, 0.25L, 2);
+ TEST_ff_f (pow, 0x1p64L, 0.125L, 256);
+ TEST_ff_f (pow, 2, 4, 16);
+ TEST_ff_f (pow, 256, 8, 0x1p64L);
+
TEST_ff_f (pow, 0.75L, 1.25L, 0.697953644326574699205914060237425566L);
#if defined TEST_DOUBLE || defined TEST_LDOUBLE
@@ -4312,12 +4342,18 @@ initialize (void)
HUGE_VALL, HUGE_VAL, HUGE_VALF);
minus_infty = CHOOSE (-HUGE_VALL, -HUGE_VAL, -HUGE_VALF,
-HUGE_VALL, -HUGE_VAL, -HUGE_VALF);
+ max_value = CHOOSE (LDBL_MAX, DBL_MAX, FLT_MAX,
+ LDBL_MAX, DBL_MAX, FLT_MAX);
+ min_value = CHOOSE (LDBL_MIN, DBL_MIN, FLT_MIN,
+ LDBL_MIN, DBL_MIN, FLT_MIN);
(void) &plus_zero;
(void) &nan_value;
(void) &minus_zero;
(void) &plus_infty;
(void) &minus_infty;
+ (void) &max_value;
+ (void) &min_value;
/* Clear all exceptions. From now on we must not get random exceptions. */
feclearexcept (FE_ALL_EXCEPT);
--- libc/sysdeps/x86_64/fpu/e_powl.S.jj 2001-09-19 12:24:08.000000000 +0200
+++ libc/sysdeps/x86_64/fpu/e_powl.S 2004-07-14 20:02:06.717899272 +0200
@@ -1,5 +1,5 @@
/* ix87 specific implementation of pow function.
- Copyright (C) 1996, 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
+ Copyright (C) 1996, 1997, 1998, 1999, 2001, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -48,6 +48,9 @@ one: .double 1.0
ASM_TYPE_DIRECTIVE(limit,@object)
limit: .double 0.29
ASM_SIZE_DIRECTIVE(limit)
+ ASM_TYPE_DIRECTIVE(p63,@object)
+p63: .byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
+ ASM_SIZE_DIRECTIVE(p63)
#ifdef PIC
#define MO(op) op##(%rip)
@@ -87,6 +90,14 @@ ENTRY(__ieee754_powl)
fxch // y : x
+ /* fistpll raises invalid exception for |y| >= 1L<<63. */
+ fldl MO(p63) // 1L<<63 : y : x
+ fld %st(1) // y : 1L<<63 : y : x
+ fabs // |y| : 1L<<63 : y : x
+ fcomip %st(1), %st // 1L<<63 : y : x
+ fstp %st(0) // y : x
+ jnc 2f
+
/* First see whether `y' is a natural number. In this case we
can use a more precise algorithm. */
fld %st // y : y : x
@@ -105,7 +116,7 @@ ENTRY(__ieee754_powl)
negl %eax
adcl $0, %edx
negl %edx
-4: fldl MO(one) // 1 : x
+4: fld1 // 1 : x
fxch
6: shrdl $1, %edx, %eax
@@ -123,7 +134,7 @@ ENTRY(__ieee754_powl)
/* y is ±NAN */
30: fldt 8(%rsp) // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fucomip %st(1),%st // x : y
je 31f
fxch // y : x
@@ -133,7 +144,7 @@ ENTRY(__ieee754_powl)
.align ALIGNARG(4)
2: /* y is a real number. */
fxch // x : y
- fldl MO(one) // 1.0 : x : y
+ fld1 // 1.0 : x : y
fld %st(1) // x : 1.0 : x : y
fsub %st(1) // x-1 : 1.0 : x : y
fabs // |x-1| : 1.0 : x : y
@@ -148,6 +159,11 @@ ENTRY(__ieee754_powl)
7: fyl2x // log2(x) : y
8: fmul %st(1) // y*log2(x) : y
+ fxam
+ fnstsw
+ andb $0x45, %ah
+ cmpb $0x05, %ah // is y*log2(x) == ±inf ?
+ je 28f
fst %st(1) // y*log2(x) : y*log2(x)
frndint // int(y*log2(x)) : y*log2(x)
fsubr %st, %st(1) // int(y*log2(x)) : fract(y*log2(x))
@@ -158,11 +174,16 @@ ENTRY(__ieee754_powl)
fstp %st(1) // 2^fract(y*log2(x))*2^int(y*log2(x))
ret
+28: fstp %st(1) // y*log2(x)
+ fld1 // 1 : y*log2(x)
+ fscale // 2^(y*log2(x)) : y*log2(x)
+ fstp %st(1) // 2^(y*log2(x))
+ ret
// pow(x,±0) = 1
.align ALIGNARG(4)
11: fstp %st(0) // pop y
- fldl MO(one)
+ fld1
ret
// y == ±inf
@@ -191,7 +212,7 @@ ENTRY(__ieee754_powl)
ret
.align ALIGNARG(4)
-14: fldl MO(one)
+14: fld1
ret
.align ALIGNARG(4)
@@ -275,7 +296,7 @@ ENTRY(__ieee754_powl)
jz 27f // jump if not odd
// It's an odd integer.
// Raise divide-by-zero exception and get minus infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
fchs
ret
@@ -283,7 +304,7 @@ ENTRY(__ieee754_powl)
25: fstp %st(0)
26:
27: // Raise divide-by-zero exception and get infinity value.
- fldl MO(one)
+ fld1
fdivl MO(zero)
ret
@@ -310,7 +331,7 @@ ENTRY(__ieee754_powl)
22: fstp %st(0)
23:
-24: fldl MO(zero)
+24: fldz
ret
END(__ieee754_powl)
Jakub
More information about the Libc-hacker
mailing list