This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Fix x86 pow inaccuracy for large integer exponents (bug 706)


Bug 706 reports that the x86 pow function is inaccurate for large
integer exponents because it uses an iterative multiplication
algorithm that allows rounding errors to accumulate.

I propose this patch to fix the bug by using the log+exp real-exponent
algorithm for integer exponents with absolute value at least 1024.
The choice of that exponent is somewhat arbitrary.  Both algorithms
take advantage of having 11 bits excess precision, which is sufficient
for all inputs for the log+exp algorithm (you need about 53 fractional
bits in y*log2(x), and get them unless there is overflow or underflow
because without overflow or underflow there will be at most 10 bits in
the integer part of y*log2(x)) but not for exponents much bigger than
1024 with the multiplication algorithm.  In terms of accuracy there is
probably no real advantage in the multiplication algorithm once the
exponent is large enough that no cases other than powers of 2 are
exact (i.e. once power of 3 are no longer exactly representable), but
I don't know how the speed of the algorithms compares; choosing a
threshold as large as possible is conservative in that it avoids
changing the algorithm unnecessarily.

Given that change, the real-exponent code then needs to handle
negative x with odd integer exponent, which is done using code similar
to that used in powl which already needed to handle that issue.

This bug does not affect powf since 40 bits excess precision is enough
for any integer exponent that uses the multiplication algorithm in x86
powf.  It does affect powl, but for many inputs the real-exponent
algorithm is even worse there (see bug 13881) so this patch does not
change powl (but a fix for bug 13881 will need to stop the
integer-exponent algorithm being used for all but very small
exponents, as well as improving the real-exponent code).

Tested x86 and x86_64 and ulps updated accordingly (x86 didn't need
any new ulps, x86_64 did need ulps for some new tests for powf).

2012-04-08  Joseph Myers  <joseph@codesourcery.com>

	[BZ #706]
	* sysdeps/i386/fpu/e_pow.S (p10): New object.
	(__ieee754_pow): Use iterative multiplication algorithm only for
	integer exponents with absolute value below 1024.  Check for odd
	integer exponents when using algorithm for real exponents.
	* math/libm-test.inc (pow_test): Add more tests.
	* sysdeps/x86_64/fpu/libm-test-ulps: Update.

diff --git a/math/libm-test.inc b/math/libm-test.inc
index 32bce45..0820d6a 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -6071,6 +6071,32 @@ pow_test (void)
   /* Bug 13872: spurious OVERFLOW exception may be present.  */
   TEST_ff_f (pow, -min_value, max_value, plus_zero, OVERFLOW_EXCEPTION_OK);
 
+#ifndef TEST_LDOUBLE /* Bug 13881.  */
+  TEST_ff_f (pow, 0x0.ffffffp0, 10, 0.999999403953712118183885036774764444747L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 100, 0.999994039553108359406305079606228341585L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 1000, 0.9999403971297699052276650144650733772182L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 0x1p24, 0.3678794302077803437135155590023422899744L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 0x1p30, 1.603807831524924233828134753069728224044e-28L);
+  TEST_ff_f (pow, 0x0.ffffffp0, 0x1.234566p30, 2.374884712135295099971443365381007297732e-32L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -10, 1.000000596046643153205170848674671339688L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -100, 1.000005960482418779499387594989252621451L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -1000, 1.000059606422943986382898964231519867906L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -0x1p24, 2.7182819094701610539628664526874952929416L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -0x1p30, 6.2351609734265057988914412331288163636075e+27L);
+  TEST_ff_f (pow, 0x0.ffffffp0, -0x1.234566p30, 4.2107307141696353498921307077142537353515e+31L);
+  TEST_ff_f (pow, 0x1.000002p0, 0x1p24, 7.3890552180866447284268641248075832310141L);
+  TEST_ff_f (pow, 0x1.000002p0, 0x1.234566p29, 4.2107033006507495188536371520637025716256e+31L);
+  TEST_ff_f (pow, 0x1.000002p0, -0x1.234566p29, 2.3749001736727769098946062325205705312166e-32L);
+#endif
+
+  /* Bug 13881: powl inaccurate so these tests disabled for long double.  */
+#if !defined TEST_FLOAT && !defined TEST_LDOUBLE
+  TEST_ff_f (pow, 0x0.fffffffffffff8p0L, 0x1.23456789abcdfp62L, 1.0118762747827252817436395051178295138220e-253L);
+  TEST_ff_f (pow, 0x0.fffffffffffff8p0L, -0x1.23456789abcdfp62L, 9.8826311568054561811190162420900667121992e+252L);
+  TEST_ff_f (pow, 0x1.0000000000001p0L, 0x1.23456789abcdfp61L, 9.8826311568044974397135026217687399395481e+252L);
+  TEST_ff_f (pow, 0x1.0000000000001p0L, -0x1.23456789abcdfp61L, 1.0118762747828234466621210689458255908670e-253L);
+#endif
+
   END (pow);
 }
 
diff --git a/sysdeps/i386/fpu/e_pow.S b/sysdeps/i386/fpu/e_pow.S
index b61a946..73d2421 100644
--- a/sysdeps/i386/fpu/e_pow.S
+++ b/sysdeps/i386/fpu/e_pow.S
@@ -32,6 +32,9 @@ limit:	.double 0.29
 	ASM_TYPE_DIRECTIVE(p63,@object)
 p63:	.byte 0, 0, 0, 0, 0, 0, 0xe0, 0x43
 	ASM_SIZE_DIRECTIVE(p63)
+	ASM_TYPE_DIRECTIVE(p10,@object)
+p10:	.byte 0, 0, 0, 0, 0, 0, 0x90, 0x40
+	ASM_SIZE_DIRECTIVE(p10)
 
 	.section .rodata.cst16,"aM",@progbits,16
 
@@ -116,7 +119,15 @@ ENTRY(__ieee754_pow)
 	sahf
 	jne	3f
 
-	/* OK, we have an integer value for y.  */
+	/* OK, we have an integer value for y.  If large enough that
+	   errors may propagate out of the 11 bits excess precision, use
+	   the algorithm for real exponent instead.  */
+	fld	%st		// y : y : x
+	fabs			// |y| : y : x
+	fcompl	MO(p10)		// y : x
+	fnstsw
+	sahf
+	jnc	2f
 	popl	%eax
 	cfi_adjust_cfa_offset (-4)
 	popl	%edx
@@ -157,7 +168,9 @@ ENTRY(__ieee754_pow)
 
 	cfi_adjust_cfa_offset (8)
 	.align ALIGNARG(4)
-2:	/* y is a large integer (so even).  */
+2:	// y is a large integer (absolute value at least 1L<<10), but
+	// may be odd unless at least 1L<<64.  So it may be necessary
+	// to adjust the sign of a negative result afterwards.
 	fxch			// x : y
 	fabs			// |x| : y
 	fxch			// y : x
@@ -187,9 +200,41 @@ ENTRY(__ieee754_pow)
 	f2xm1			// 2^fract(y*log2(x))-1 : int(y*log2(x))
 	faddl	MO(one)		// 2^fract(y*log2(x)) : int(y*log2(x))
 	fscale			// 2^fract(y*log2(x))*2^int(y*log2(x)) : int(y*log2(x))
-	addl	$8, %esp
-	cfi_adjust_cfa_offset (-8)
 	fstp	%st(1)		// 2^fract(y*log2(x))*2^int(y*log2(x))
+	testb	$2, %dh
+	jz	292f
+	// x is negative.  If y is an odd integer, negate the result.
+	fldl	20(%esp)	// y : abs(result)
+	fld	%st		// y : y : abs(result)
+	fabs			// |y| : y : abs(result)
+	fcompl	MO(p63)		// y : abs(result)
+	fnstsw
+	sahf
+	jnc	291f
+
+	// We must find out whether y is an odd integer.
+	fld	%st		// y : y : abs(result)
+	fistpll	(%esp)		// y : abs(result)
+	fildll	(%esp)		// int(y) : y : abs(result)
+	fucompp			// abs(result)
+	fnstsw
+	sahf
+	jne	292f
+
+	// OK, the value is an integer, but is it odd?
+	popl	%eax
+	cfi_adjust_cfa_offset (-4)
+	popl	%edx
+	cfi_adjust_cfa_offset (-4)
+	andb	$1, %al
+	jz	290f		// jump if not odd
+	// It's an odd integer.
+	fchs
+290:	ret
+	cfi_adjust_cfa_offset (8)
+291:	fstp	%st(0)		// abs(result)
+292:	addl	$8, %esp
+	cfi_adjust_cfa_offset (-8)
 	ret
 
 
diff --git a/sysdeps/x86_64/fpu/libm-test-ulps b/sysdeps/x86_64/fpu/libm-test-ulps
index d43955a..dce60cf 100644
--- a/sysdeps/x86_64/fpu/libm-test-ulps
+++ b/sysdeps/x86_64/fpu/libm-test-ulps
@@ -1444,6 +1444,17 @@ Test "log1p (-0.25) == -0.287682072451780927439219005993827432":
 float: 1
 ifloat: 1
 
+# pow
+Test "pow (0x0.ffffffp0, -0x1p24) == 2.7182819094701610539628664526874952929416":
+float: 1
+ifloat: 1
+Test "pow (0x0.ffffffp0, 0x1p24) == 0.3678794302077803437135155590023422899744":
+float: 1
+ifloat: 1
+Test "pow (0x1.000002p0, 0x1p24) == 7.3890552180866447284268641248075832310141":
+float: 1
+ifloat: 1
+
 # pow_downward
 Test "pow_downward (1.0625, 1.125) == 1.070582293028761362162622578677070098674":
 ildouble: 1
@@ -2413,6 +2424,10 @@ Function: "log1p":
 float: 1
 ifloat: 1
 
+Function: "pow":
+float: 1
+ifloat: 1
+
 Function: "pow_downward":
 float: 1
 ifloat: 1

-- 
Joseph S. Myers
joseph@codesourcery.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]