This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[PATCH] PowerPC - logb/ilogb optimization for POWER7
- From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
- To: "GNU C. Library" <libc-alpha at sourceware dot org>
- Date: Tue, 10 Apr 2012 12:45:02 -0300
- Subject: [PATCH] PowerPC - logb/ilogb optimization for POWER7
This patch provides optimized logb/ilogb functions for POWER7. For PPC32
ilogb shows 30% and logb 120% more throughput while for PPC64 ilobg shows
10% and logb 200%. The optimization is done by avoid float-point to
integer transformation and by using VSX float-point bitwise instructions.
---
2012-04-10 Adhemerval Zanella <azanella@linux.vnet.ibm.com>
* sysdeps/powerpc/powerpc32/power7/fpu/s_ilogb.c: New file: optimized
ilogb for POWER7.
* sysdeps/powerpc/powerpc64/power7/fpu/s_ilogb.c: New file: wrapper
to include the PPC32 version.
* sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c: New file: optimized
logb for POWER7.
* sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c: New file: wrapper
to include the PPC64 version.
* sysdeps/powerpc/fpu/math_private.h: Add optimized POWER7 macro to
cast a double to a word.
* math/libm-test.inc: Add extra tests for ilogb.
diff --git a/math/libm-test.inc b/math/libm-test.inc
index 32bce45..cb952b8 100644
--- a/math/libm-test.inc
+++ b/math/libm-test.inc
@@ -3742,6 +3742,12 @@ ilogb_test (void)
TEST_f_i (ilogb, M_El, 1);
TEST_f_i (ilogb, 1024, 10);
TEST_f_i (ilogb, -2000, 10);
+#if defined TEST_FLOAT
+ TEST_f_i (ilogb, 1.701412e+38, 127);
+#endif
+#if defined TEST_DOUBLE
+ TEST_f_i (ilogb, 8.988466e+307, 1023);
+#endif
/* XXX We have a problem here: the standard does not tell us whether
exceptions are allowed/required. ignore them for now. */
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index a916be3..88aa0fa 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -25,6 +25,29 @@
#include <dl-procinfo.h>
#include_next <math_private.h>
+
+#if defined(_ARCH_PWR7)
+
+/* Optimized double to word cast for POWER7 */
+#undef DOUBLE_TO_WORDS
+#define DOUBLE_TO_WORDS(d, i) \
+ do { \
+ double d__ = d; \
+ int32_t i__; \
+ ieee_double_shape_type iw_u; \
+ __asm ( \
+ "fctiwz %1,%1\n" \
+ "stfd %1,%2\n" \
+ "ori 2,2,0\n" \
+ "lwz %0,%3\n" \
+ : "=r" (i__) \
+ : "f" (d__), "m" (iw_u.value), "m" (iw_u.word)); \
+ i = i__; \
+ } while (0)
+
+#endif /* __ARCH_PWR7 */
+
+
# if __WORDSIZE == 64 || defined _ARCH_PWR4
# define __CPU_HAS_FSQRT 1
# else
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_ilogb.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_ilogb.c
new file mode 100644
index 0000000..d7595ab
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_ilogb.c
@@ -0,0 +1,78 @@
+/* ilogb(). PowerPC64/POWER7 version.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <fenv.h>
+#include <math.h>
+#include <math_private.h>
+
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+ * instructions over FP values. */
+
+static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */
+static const double two10m1 = -1023.0; /* 2**10 -1 */
+
+static const union {
+ unsigned long long mask;
+ double d;
+}
+exp_mask = { 0x7ff0000000000000ULL },
+sig_mask = { 0x000FFFFFFFFFFFFFULL };
+
+int
+__ilogb (double x)
+{
+ double dret;
+ int ret;
+
+ if (__builtin_expect (x == 0.0, 0))
+ {
+ feraiseexcept (FE_INVALID);
+ return FP_ILOGB0;
+ }
+ /* dret = x & 0x7ff0000000000000; */
+ asm (
+ "xxland %x0,%x1,%x2\n"
+ "fcfid %0,%0"
+ : "=f" (dret)
+ : "f" (x), "f" (exp_mask.d));
+ /* dret = (x >> 52) - 1023.0; */
+ dret = (dret * two1div52) + two10m1;
+ if (__builtin_expect (dret > -two10m1, 0))
+ {
+ double sig;
+ /* sig = x & 0x000FFFFFFFFFFFFF; */
+ asm (
+ "xxland %x0,%x1,%x2\n"
+ : "=f" (sig)
+ : "f" (x), "f" (sig_mask.d));
+ feraiseexcept (FE_INVALID);
+ /* NaN differs by INF by having some bits set in significand. */
+ if (sig > 0.0)
+ return FP_ILOGBNAN;
+ return INT_MAX;
+ }
+ DOUBLE_TO_WORDS (dret, ret);
+ return ret;
+}
+
+weak_alias (__ilogb, ilogb)
+#ifdef NO_LONG_DOUBLE
+strong_alias (__ilogb, __ilogbl) weak_alias (__ilogb, ilogbl)
+#endif
diff --git a/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
new file mode 100644
index 0000000..13fdf35
--- /dev/null
+++ b/sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c
@@ -0,0 +1,58 @@
+/* logb(). PowerPC64/POWER7 version.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+ Contributed by Adhemerval Zanella Netto <azanella@br.ibm.com>.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <errno.h>
+#include <math.h>
+#include <math_private.h>
+
+/* This implementation avoid FP to INT conversions by using VSX bitwise
+ * instructions over FP values. */
+
+static const double two1div52 = 2.220446049250313e-16; /* 1/2**52 */
+static const double two10m1 = -1023.0; /* 2**10 -1 */
+
+/* FP mask to extract the exponent */
+static const union {
+ unsigned long long mask;
+ double d;
+} mask = { 0x7ff0000000000000ULL };
+
+double
+__logb (double x)
+{
+ double ret;
+ if (x == 0.0)
+ return -1.0 / fabs (x);
+ /* ret = x & 0x7ff0000000000000; */
+ asm (
+ "xxland %x0,%x1,%x2\n"
+ "fcfid %0,%0"
+ : "=f" (ret)
+ : "f" (x), "f" (mask.d));
+ /* ret = (ret >> 52) - 1023.0; */
+ ret = (ret * two1div52) + two10m1;
+ if (__builtin_expect (ret > -two10m1, 0))
+ return (x * x);
+ return ret;
+}
+
+weak_alias (__logb, logb)
+#ifdef NO_LONG_DOUBLE
+strong_alias (__logb, __logbl) weak_alias (__logb, logbl)
+#endif
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_ilogb.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_ilogb.c
new file mode 100644
index 0000000..5756ecb
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_ilogb.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/s_ilogb.c>
diff --git a/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
new file mode 100644
index 0000000..ff3a9e0
--- /dev/null
+++ b/sysdeps/powerpc/powerpc64/power7/fpu/s_logb.c
@@ -0,0 +1 @@
+#include <sysdeps/powerpc/powerpc32/power7/fpu/s_logb.c>
--
1.6.0.2
--
Adhemerval Zanella Netto
Software Engineer
Linux Technology Center Brazil
Toolchain / GLIBC on Power Architecture
azanella@linux.vnet.ibm.com / azanella@br.ibm.com
+55 61 8642-9890