This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Some more dbl-64/s_fma.c issues

From: Jakub Jelinek <jakub at redhat dot com>
To: Ulrich Drepper <drepper at gmail dot com>, "Joseph S. Myers" <joseph at codesourcery dot com>, Richard Henderson <rth at twiddle dot net>
Cc: libc-alpha at sources dot redhat dot com
Date: Thu, 14 Oct 2010 17:37:29 +0200
Subject: Some more dbl-64/s_fma.c issues
Reply-to: Jakub Jelinek <jakub at redhat dot com>

Hi!

We need to take care of underflows too in the dbl-64/s_fma.c version
unfortunately.  For x86-64, it is not a big deal, we could just use
ldbl-96/s_fma.c version there instead, but we have a few targets with
long double == double, and more importantly, if we manage to solve
this properly dbl-64/s_fma.c can be just used as ldbl-96/s_fmal.c
and ldbl-128/s_fmal.c only with very few changes.

This patch attempts to deal with underflow, unfortunately not 100%.
If underflow is possible, but x * y is not less than half of smallest
subnormal and z has roughly similar exponent as x * y, then the patch
chooses to do x * y etc. computations in 0x1p106 bigger quantities
(as otherwise Dekker's multiply is not exact, etc.).
The problem then is that we need to do
(a1 + u.d) * 0x1p-106 as an operation with just one rounding to nearest.
The patch handles it if either (a1 + u.d) * 0x1p-106 with round to zero
is not smaller than DBL_MIN (because then * 0x1p-106 doesn't actually do
any rounding), or if it is a smaller subnormal (I feel it is safe if
it is smaller than DBL_MIN / 4.0, IEEE 754 needs 2 guard bits and sticky
bit) - then the oring in of INEXACT exception bit from round to zero
addition is into a sticky or smaller bit and thus IMHO round to nearest
should handle it properly.  Strangely attached mpfr random tester actually
just shows issues when the subnormal is in DBL_MIN / 2.0 to DBL_MIN -
DBL_DENORM_MIN range, though of course that's not proof.
When the result is subnormal with highest bit of mantissa set, the
v.ieee.mantissa1 |= j;
changes the high guard bit, so it is not surprising that it doesn't
round correctly in many cases.

Any ideas how to solve this (preferrably without using soft-fp)?

FYI, the tester for 100mil pseudo random fma calls shows no errors
on the ldbl-96/s_fma.c implementation, 9647332 errors on the old
fma (x * y + z), 35543 errors with current git and 2842 with this
patch in.  3 of these 2842 errors in TEST_fff_f form:
  TEST_fff_f (fma, -0x1.19cab66d73e17p-959, 0x1.c7108a8c5ff51p-107, -0x0.80b0ad65d9b64p-1022, -0x0.80b0ad65d9d58p-1022)
  TEST_fff_f (fma, -0x1.d2eaed6e8e9d3p-979, -0x1.4e066c62ac9ddp-63, -0x0.9245e6b003454p-1022, -0x0.9245c09c5fb5ep-1022)
  TEST_fff_f (fma, 0x1.153d650bb9f06p-907, 0x1.2d01230d48407p-125, -0x0.b278d5acfc3cp-1022, -0x0.b22757123bbe8p-1022)
(last argument from mpfr_fma+mpfr_subnormalize).

2010-10-14  Jakub Jelinek  <jakub@redhat.com>

	[BZ #3268]
	* math/libm-test.inc (fma_test): Add some more tests.
	* sysdeps/ieee754/dbl-64/s_fma.c (__fma): Atempt to handle
	underflows.

--- libc/math/libm-test.inc.jj	2010-10-14 09:08:58.000000000 +0200
+++ libc/math/libm-test.inc	2010-10-14 15:41:35.000000000 +0200
@@ -2808,6 +2808,13 @@ fma_test (void)
   TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 0x1.001p+0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1011);
   TEST_fff_f (fma, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+0, 0x1.fffffffffffffp+1023, -0x1.ffffffffffffdp+1023);
   TEST_fff_f (fma, 0x1.fffffffffffffp+1023, 2.0, -0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+1023);
+  TEST_fff_f (fma, 0x1.6a09e667f3bccp-538, 0x1.6a09e667f3bccp-538, 0.0, 0.0);
+  TEST_fff_f (fma, 0x1.deadbeef2feedp-495, 0x1.deadbeef2feedp-495, -0x1.bf86a5786a574p-989, 0x0.0000042625a1fp-1022);
+  TEST_fff_f (fma, 0x1.deadbeef2feedp-503, 0x1.deadbeef2feedp-503, -0x1.bf86a5786a574p-1005, 0x0.0000000004262p-1022);
+  TEST_fff_f (fma, 0x1p-537, 0x1p-538, 0x1p-1074, 0x0.0000000000002p-1022);
+  TEST_fff_f (fma, 0x1.7fffff8p-968, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000001p-1022);
+  TEST_fff_f (fma, 0x1.4000004p-967, 0x1p-106, 0x0.000001p-1022, 0x0.0000010000003p-1022);
+  TEST_fff_f (fma, 0x1.4p-967, -0x1p-106, -0x0.000001p-1022, -0x0.0000010000002p-1022);
 #endif
 
   END (fma);
--- libc/sysdeps/ieee754/dbl-64/s_fma.c.jj	2010-10-14 09:08:58.000000000 +0200
+++ libc/sysdeps/ieee754/dbl-64/s_fma.c	2010-10-14 17:03:00.000000000 +0200
@@ -39,15 +39,20 @@ __fma (double x, double y, double z)
 			>= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG, 0)
       || __builtin_expect (u.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
       || __builtin_expect (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
-      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0))
+      || __builtin_expect (w.ieee.exponent >= 0x7ff - DBL_MANT_DIG, 0)
+      || __builtin_expect (u.ieee.exponent + v.ieee.exponent
+			   <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG, 0))
     {
-      /* If x or y or z is Inf/NaN or if fma will certainly overflow,
+      /* If x or y or z is Inf/NaN, or if fma will certainly overflow,
+	 or if x * y is less than half of DBL_DENORM_MIN,
 	 compute as x * y + z.  */
       if (u.ieee.exponent == 0x7ff
 	  || v.ieee.exponent == 0x7ff
 	  || w.ieee.exponent == 0x7ff
 	  || u.ieee.exponent + v.ieee.exponent
-	     > 0x7ff + IEEE754_DOUBLE_BIAS)
+	     > 0x7ff + IEEE754_DOUBLE_BIAS
+	  || u.ieee.exponent + v.ieee.exponent
+	     < IEEE754_DOUBLE_BIAS - DBL_MANT_DIG - 2)
 	return x * y + z;
       if (u.ieee.exponent + v.ieee.exponent
 	  >= 0x7ff + IEEE754_DOUBLE_BIAS - DBL_MANT_DIG)
@@ -87,7 +92,7 @@ __fma (double x, double y, double z)
 	  else
 	    v.d *= 0x1p53;
 	}
-      else
+      else if (v.ieee.exponent >= 0x7ff - DBL_MANT_DIG)
 	{
 	  v.ieee.exponent -= DBL_MANT_DIG;
 	  if (u.ieee.exponent)
@@ -95,6 +100,24 @@ __fma (double x, double y, double z)
 	  else
 	    u.d *= 0x1p53;
 	}
+      else /* if (u.ieee.exponent + v.ieee.exponent
+		  <= IEEE754_DOUBLE_BIAS + DBL_MANT_DIG) */
+	{
+	  if (u.ieee.exponent > v.ieee.exponent)
+	    u.ieee.exponent += 2 * DBL_MANT_DIG;
+	  else
+	    v.ieee.exponent += 2 * DBL_MANT_DIG;
+	  if (w.ieee.exponent <= 4 * DBL_MANT_DIG + 4)
+	    {
+	      if (w.ieee.exponent)
+		w.ieee.exponent += 2 * DBL_MANT_DIG;
+	      else
+		w.d *= 0x1p106;
+	      adjust = -1;
+	    }
+	  /* Otherwise x * y should just affect inexact
+	     and nothing else.  */
+	}
       x = u.d;
       y = v.d;
       z = w.d;
@@ -123,18 +146,45 @@ __fma (double x, double y, double z)
   fesetround (FE_TOWARDZERO);
   /* Perform m2 + a2 addition with round to odd.  */
   u.d = a2 + m2;
-  if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
-    u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
-  feupdateenv (&env);
-
-  /* Add that to a1.  */
-  a1 = a1 + u.d;
 
-  /* And adjust exponent if needed.  */
-  if (__builtin_expect (adjust, 0))
-    a1 *= 0x1p53;
-
-  return a1;
+  if (__builtin_expect (adjust == 0, 1))
+    {
+      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      feupdateenv (&env);
+      /* Result is a1 + u.d.  */
+      return a1 + u.d;
+    }
+  else if (__builtin_expect (adjust > 0, 1))
+    {
+      if ((u.ieee.mantissa1 & 1) == 0 && u.ieee.exponent != 0x7ff)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      feupdateenv (&env);
+      /* Result is a1 + u.d, scaled up.  */
+      return (a1 + u.d) * 0x1p53;
+    }
+  else
+    {
+      if ((u.ieee.mantissa1 & 1) == 0)
+	u.ieee.mantissa1 |= fetestexcept (FE_INEXACT) != 0;
+      v.d = a1 + u.d;
+      int j = fetestexcept (FE_INEXACT) != 0;
+      w.d = v.d * 0x1p-106;
+      feupdateenv (&env);
+      /* Ensure the following computations are performed in default rounding
+	 mode instead of just reusing the round to zero computation.  */
+      asm volatile ("" : "=m" (u), "=m" (v) : "m" (u), "m" (v));
+      /* If a1 + u.d is exact, the only rounding happens during
+	 scaling down.  */
+      if (j == 0)
+	return v.d * 0x1p-106;
+      /* If result rounded to zero is not subnormal, no double
+	 rounding will occur.  */
+      if (w.ieee.exponent)
+	return (a1 + u.d) * 0x1p-106;
+      v.ieee.mantissa1 |= j;
+      return v.d * 0x1p-106;
+    }
 }
 #ifndef __fma
 weak_alias (__fma, fma)


	Jakub

Attachment: testfma.c
Description: Text document

Follow-Ups:
- [PATCH] Fix some more dbl-64/s_fma.c issues
  - From: Jakub Jelinek

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]