This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Use integer constants in mpa.c


Hi,

As a follow-up to the integral mantissa patch, this patch uses
integral constants throughout mpa.c to prevent the compiler from
unnecessarily generating fp conversion and operations code for it.
Additionally, there were a few places where the compiler predicted the
quick exit branch as favourable, resulting in a further disadvantage
to the slower path.  I have added branch prediction hints in these
cases to favour the slower path, thus evening out the performance
characteristics.

I verified the results of this change on x86_64, arm and on powerpc.
The only change in code to powerpc is due to the change in branch
prediction, which does not have a noticeable impact.  On x86_64 and
arm however, there is a noticeable impact.

x86_64 without patch:

exp: ITERS:5e+08: TOTAL:11.6349s, MAX:138.288ns, MIN:10.684ns, 4.29741e+07 iter/s
pow: ITERS:2e+08: TOTAL:12.3583s, MAX:189.945ns, MIN:58.128ns, 1.61835e+07 iter/s
slowexp: ITERS:5e+06: TOTAL:9.13459s, MAX:5052.29ns, MIN:1719.01ns, 547370 iter/s
slowpow: ITERS:100000: TOTAL:8.30797s, MAX:97886.4ns, MIN:80192.2ns, 12036.6 iter/s


x86_64 with patch:

exp: ITERS:5e+08: TOTAL:11.312s, MAX:93.428ns, MIN:20.402ns, 4.4201e+07 iter/s
pow: ITERS:2e+08: TOTAL:12.1221s, MAX:193.53ns, MIN:58.075ns, 1.64988e+07 iter/s
slowexp: ITERS:5e+06: TOTAL:6.55884s, MAX:2519.28ns, MIN:1260.72ns, 762329 iter/s
slowpow: ITERS:100000: TOTAL:7.19873s, MAX:75651.4ns, MIN:71147.3ns, 13891.3 iter/s

arm without patch:

This is without the long mantissa patch as well, since that resulted
in a performance regression on arm.  Posting that here will only bloat
the performance figures.

exp: ITERS:100000: TOTAL:100.091s, MAX:1.0022e+06ns, MIN:1.00042e+06ns, 999.096 iter/s
pow: ITERS:100000: TOTAL:259.985s, MAX:2.6017e+06ns, MIN:2.59909e+06ns, 384.638 iter/s

arm with patch:

exp: ITERS:100000: TOTAL:71.0013s, MAX:711433ns, MIN:709795ns, 1408.42 iter/s
pow: ITERS:100000: TOTAL:183.513s, MAX:1.8366e+06ns, MIN:1.8347e+06ns, 544.92 iter/s

That's approx 13% improvement on x86_64 and 30% on arm.  No
regressions due to this on any of the architectures.

Siddhesh

	* sysdeps/ieee754/dbl-64/mpa.c (__acr): Use integral
	constants.  Add branch hint to favour the slower path.
	(denorm): Likewise.
	(__mp_dbl): Likewise.
	(__dbl_mp): Likewise.
	(add_magnitudes): Likewise.
	(sub_magnitudes): Likewise.
	(__add): Likewise.
	(__sub): Likewise.
	(__mul): Likewise.
	(__sqr): Likewise.
	(__dvd): Likewise.


diff --git a/sysdeps/ieee754/dbl-64/mpa.c b/sysdeps/ieee754/dbl-64/mpa.c
index f1ca8ce..bad02b0 100644
--- a/sysdeps/ieee754/dbl-64/mpa.c
+++ b/sysdeps/ieee754/dbl-64/mpa.c
@@ -80,14 +80,14 @@ __acr (const mp_no *x, const mp_no *y, int p)
 {
   long i;
 
-  if (X[0] == ZERO)
+  if (__glibc_unlikely (X[0] == 0))
     {
-      if (Y[0] == ZERO)
+      if (Y[0] == 0)
 	i = 0;
       else
 	i = -1;
     }
-  else if (Y[0] == ZERO)
+  else if (__glibc_unlikely (Y[0] == 0))
     i = 1;
   else
     {
@@ -112,7 +112,7 @@ __cpy (const mp_no *x, mp_no *y, int p)
   long i;
 
   EY = EX;
-  for (i = 0; i <= p; i++)
+  for (i = 0; __glibc_likely (i <= p); i++)
     Y[i] = X[i];
 }
 #endif
@@ -140,10 +140,10 @@ norm (const mp_no *x, double *y, int p)
     }
   else
     {
-      for (a = ONE, z[1] = X[1]; z[1] < TWO23;)
+      for (a = 1, z[1] = X[1]; z[1] < TWO23;)
 	{
-	  a *= TWO;
-	  z[1] *= TWO;
+	  a *= 2;
+	  z[1] *= 2;
 	}
 
       for (i = 2; i < 5; i++)
@@ -160,21 +160,21 @@ norm (const mp_no *x, double *y, int p)
 
       if (v == TWO18)
 	{
-	  if (z[4] == ZERO)
+	  if (z[4] == 0)
 	    {
 	      for (i = 5; i <= p; i++)
 		{
-		  if (X[i] == ZERO)
+		  if (X[i] == 0)
 		    continue;
 		  else
 		    {
-		      z[3] += ONE;
+		      z[3] += 1;
 		      break;
 		    }
 		}
 	    }
 	  else
-	    z[3] += ONE;
+	    z[3] += 1;
 	}
 
       c = (z[1] + R * (z[2] + R * (double) z[3])) / a;
@@ -202,43 +202,43 @@ denorm (const mp_no *x, double *y, int p)
   mantissa_t u, z[5];
 
 #define R RADIXI
-  if (EX < -44 || (EX == -44 && X[1] < TWO5))
+  if (__glibc_unlikely (EX < -44 || (EX == -44 && X[1] < TWO5)))
     {
       *y = ZERO;
       return;
     }
 
-  if (p2 == 1)
+  if (__glibc_unlikely (p2 == 1))
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
-	  z[2] = ZERO;
-	  z[3] = ZERO;
+	  z[2] = 0;
+	  z[3] = 0;
 	  k = 3;
 	}
       else if (EX == -43)
 	{
 	  z[1] = TWO10;
 	  z[2] = X[1];
-	  z[3] = ZERO;
+	  z[3] = 0;
 	  k = 2;
 	}
       else
 	{
 	  z[1] = TWO10;
-	  z[2] = ZERO;
+	  z[2] = 0;
 	  z[3] = X[1];
 	  k = 1;
 	}
     }
-  else if (p2 == 2)
+  else if (__glibc_unlikely (p2 == 2))
     {
       if (EX == -42)
 	{
 	  z[1] = X[1] + TWO10;
 	  z[2] = X[2];
-	  z[3] = ZERO;
+	  z[3] = 0;
 	  k = 3;
 	}
       else if (EX == -43)
@@ -251,7 +251,7 @@ denorm (const mp_no *x, double *y, int p)
       else
 	{
 	  z[1] = TWO10;
-	  z[2] = ZERO;
+	  z[2] = 0;
 	  z[3] = X[1];
 	  k = 1;
 	}
@@ -273,7 +273,7 @@ denorm (const mp_no *x, double *y, int p)
       else
 	{
 	  z[1] = TWO10;
-	  z[2] = ZERO;
+	  z[2] = 0;
 	  k = 1;
 	}
       z[3] = X[k];
@@ -285,11 +285,11 @@ denorm (const mp_no *x, double *y, int p)
     {
       for (i = k + 1; i <= p2; i++)
 	{
-	  if (X[i] == ZERO)
+	  if (X[i] == 0)
 	    continue;
 	  else
 	    {
-	      z[3] += ONE;
+	      z[3] += 1;
 	      break;
 	    }
 	}
@@ -306,7 +306,7 @@ denorm (const mp_no *x, double *y, int p)
 void
 __mp_dbl (const mp_no *x, double *y, int p)
 {
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
       *y = ZERO;
       return;
@@ -329,23 +329,23 @@ __dbl_mp (double x, mp_no *y, int p)
   long p2 = p;
 
   /* Sign.  */
-  if (x == ZERO)
+  if (__glibc_unlikely (x == ZERO))
     {
-      Y[0] = ZERO;
+      Y[0] = 0;
       return;
     }
   else if (x > ZERO)
-    Y[0] = ONE;
+    Y[0] = 1;
   else
     {
-      Y[0] = MONE;
+      Y[0] = -1;
       x = -x;
     }
 
   /* Exponent.  */
-  for (EY = ONE; x >= RADIX; EY += ONE)
+  for (EY = 1; x >= RADIX; EY += 1)
     x *= RADIXI;
-  for (; x < ONE; EY -= ONE)
+  for (; x < ONE; EY -= 1)
     x *= RADIX;
 
   /* Digits.  */
@@ -356,7 +356,7 @@ __dbl_mp (double x, mp_no *y, int p)
       x *= RADIX;
     }
   for (; i <= p2; i++)
-    Y[i] = ZERO;
+    Y[i] = 0;
 }
 
 /* Add magnitudes of *X and *Y assuming that abs (*X) >= abs (*Y) > 0.  The
@@ -383,39 +383,39 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
       return;
     }
 
-  zk = ZERO;
+  zk = 0;
 
-  for (; j > 0; i--, j--)
+  for (; __glibc_likely (j > 0); i--, j--)
     {
       zk += X[i] + Y[j];
       if (zk >= RADIX)
 	{
 	  Z[k--] = zk - RADIX;
-	  zk = ONE;
+	  zk = 1;
 	}
       else
         {
 	  Z[k--] = zk;
-	  zk = ZERO;
+	  zk = 0;
 	}
     }
 
-  for (; i > 0; i--)
+  for (; __glibc_likely (i > 0); i--)
     {
       zk += X[i];
       if (zk >= RADIX)
 	{
 	  Z[k--] = zk - RADIX;
-	  zk = ONE;
+	  zk = 1;
 	}
       else
         {
 	  Z[k--] = zk;
-	  zk = ZERO;
+	  zk = 0;
 	}
     }
 
-  if (zk == ZERO)
+  if (zk == 0)
     {
       for (i = 1; i <= p2; i++)
 	Z[i] = Z[i + 1];
@@ -423,7 +423,7 @@ add_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
   else
     {
       Z[1] = zk;
-      EZ += ONE;
+      EZ += 1;
     }
 }
 
@@ -453,27 +453,27 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
 
   /* The relevant least significant digit in Y is non-zero, so we factor it in
      to enhance accuracy.  */
-  if (j < p2 && Y[j + 1] > ZERO)
+  if (j < p2 && Y[j + 1] > 0)
     {
       Z[k + 1] = RADIX - Y[j + 1];
-      zk = MONE;
+      zk = -1;
     }
   else
-    zk = Z[k + 1] = ZERO;
+    zk = Z[k + 1] = 0;
 
   /* Subtract and borrow.  */
   for (; j > 0; i--, j--)
     {
       zk += (X[i] - Y[j]);
-      if (zk < ZERO)
+      if (zk < 0)
 	{
 	  Z[k--] = zk + RADIX;
-	  zk = MONE;
+	  zk = -1;
 	}
       else
         {
 	  Z[k--] = zk;
-	  zk = ZERO;
+	  zk = 0;
 	}
     }
 
@@ -481,25 +481,25 @@ sub_magnitudes (const mp_no *x, const mp_no *y, mp_no *z, int p)
   for (; i > 0; i--)
     {
       zk += X[i];
-      if (zk < ZERO)
+      if (zk < 0)
 	{
 	  Z[k--] = zk + RADIX;
-	  zk = MONE;
+	  zk = -1;
 	}
       else
         {
 	  Z[k--] = zk;
-	  zk = ZERO;
+	  zk = 0;
 	}
     }
 
   /* Normalize.  */
-  for (i = 1; Z[i] == ZERO; i++);
+  for (i = 1; Z[i] == 0; i++);
   EZ = EZ - i + 1;
   for (k = 1; i <= p2 + 1;)
     Z[k++] = Z[i++];
   for (; k <= p2;)
-    Z[k++] = ZERO;
+    Z[k++] = 0;
 }
 
 /* Add *X and *Y and store the result in *Z.  X and Y may overlap, but not X
@@ -511,12 +511,12 @@ __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int n;
 
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
       __cpy (y, z, p);
       return;
     }
-  else if (Y[0] == ZERO)
+  else if (Y[0] == 0)
     {
       __cpy (x, z, p);
       return;
@@ -548,7 +548,7 @@ __add (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  Z[0] = Y[0];
 	}
       else
-	Z[0] = ZERO;
+	Z[0] = 0;
     }
 }
 
@@ -561,13 +561,13 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   int n;
 
-  if (X[0] == ZERO)
+  if (X[0] == 0)
     {
       __cpy (y, z, p);
       Z[0] = -Z[0];
       return;
     }
-  else if (Y[0] == ZERO)
+  else if (Y[0] == 0)
     {
       __cpy (x, z, p);
       return;
@@ -599,7 +599,7 @@ __sub (const mp_no *x, const mp_no *y, mp_no *z, int p)
 	  Z[0] = -Y[0];
 	}
       else
-	Z[0] = ZERO;
+	Z[0] = 0;
     }
 }
 
@@ -618,23 +618,23 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   mantissa_store_t *diag;
 
   /* Is z=0?  */
-  if (__glibc_unlikely (X[0] * Y[0] == ZERO))
+  if (__glibc_unlikely (X[0] * Y[0] == 0))
     {
-      Z[0] = ZERO;
+      Z[0] = 0;
       return;
     }
 
   /* We need not iterate through all X's and Y's since it's pointless to
      multiply zeroes.  Here, both are zero...  */
   for (ip2 = p2; ip2 > 0; ip2--)
-    if (X[ip2] != ZERO || Y[ip2] != ZERO)
+    if (X[ip2] != 0 || Y[ip2] != 0)
       break;
 
-  a = X[ip2] != ZERO ? y : x;
+  a = X[ip2] != 0 ? y : x;
 
   /* ... and here, at least one of them is still zero.  */
   for (ip = ip2; ip > 0; ip--)
-    if (a->d[ip] != ZERO)
+    if (a->d[ip] != 0)
       break;
 
   /* The product looks like this for p = 3 (as an example):
@@ -665,15 +665,15 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
 
   k = (__glibc_unlikely (p2 < 3)) ? p2 + p2 : p2 + 3;
 
-  while (k > ip + ip2 + 1)
-    Z[k--] = ZERO;
+  while (__glibc_unlikely (k > ip + ip2 + 1))
+    Z[k--] = 0;
 
-  zk = ZERO;
+  zk = 0;
 
   /* Precompute sums of diagonal elements so that we can directly use them
      later.  See the next comment to know we why need them.  */
   diag = alloca (k * sizeof (mantissa_store_t));
-  mantissa_store_t d = ZERO;
+  mantissa_store_t d = 0;
   for (i = 1; i <= ip; i++)
     {
       d += X[i] * (mantissa_store_t) Y[i];
@@ -738,7 +738,7 @@ __mul (const mp_no *x, const mp_no *y, mp_no *z, int p)
   int e = EX + EY;
 
   /* Is there a carry beyond the most significant digit?  */
-  if (__glibc_unlikely (Z[1] == ZERO))
+  if (__glibc_likely (Z[1] == 0))
     {
       for (i = 1; i <= p2; i++)
 	Z[i] = Z[i + 1];
@@ -763,24 +763,24 @@ __sqr (const mp_no *x, mp_no *y, int p)
   mantissa_store_t yk;
 
   /* Is z=0?  */
-  if (__glibc_unlikely (X[0] == ZERO))
+  if (__glibc_unlikely (X[0] == 0))
     {
-      Y[0] = ZERO;
+      Y[0] = 0;
       return;
     }
 
   /* We need not iterate through all X's since it's pointless to
      multiply zeroes.  */
   for (ip = p; ip > 0; ip--)
-    if (X[ip] != ZERO)
+    if (X[ip] != 0)
       break;
 
   k = (__glibc_unlikely (p < 3)) ? p + p : p + 3;
 
   while (k > 2 * ip + 1)
-    Y[k--] = ZERO;
+    Y[k--] = 0;
 
-  yk = ZERO;
+  yk = 0;
 
   while (k > p)
     {
@@ -802,7 +802,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
       for (i = k - p, j = p; i < j; i++, j--)
 	yk2 += X[i] * (mantissa_store_t) X[j];
 
-      yk += 2.0 * yk2;
+      yk += 2 * yk2;
 
       DIV_RADIX (yk, Y[k]);
       k--;
@@ -820,7 +820,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
       for (i = 1, j = k - 1; i < j; i++, j--)
 	yk2 += X[i] * (mantissa_store_t) X[j];
 
-      yk += 2.0 * yk2;
+      yk += 2 * yk2;
 
       DIV_RADIX (yk, Y[k]);
       k--;
@@ -828,7 +828,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
   Y[k] = yk;
 
   /* Squares are always positive.  */
-  Y[0] = 1.0;
+  Y[0] = 1;
 
   /* Get the exponent sum into an intermediate variable.  This is a subtle
      optimization, where given enough registers, all operations on the exponent
@@ -836,7 +836,7 @@ __sqr (const mp_no *x, mp_no *y, int p)
   int e = EX * 2;
 
   /* Is there a carry beyond the most significant digit?  */
-  if (__glibc_unlikely (Y[1] == ZERO))
+  if (__glibc_unlikely (Y[1] == 0))
     {
       for (i = 1; i <= p; i++)
 	Y[i] = Y[i + 1];
@@ -894,8 +894,8 @@ __dvd (const mp_no *x, const mp_no *y, mp_no *z, int p)
 {
   mp_no w;
 
-  if (X[0] == ZERO)
-    Z[0] = ZERO;
+  if (__glibc_unlikely (X[0] == 0))
+    Z[0] = 0;
   else
     {
       __inv (y, &w, p);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]