This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] PowerPC: Set/restore rounding mode only when needed


Thanks for the review Siddhesh, below I'm sending a version 2 of my patch.
I have corrected all the suggestion you have made and I also included a fix
to work correctly with my patch 'PowerPC: Fix __fe_mask_env export'. I also
added some ldbl-128 fixes that I forgot to add on previous patch (change the
fegetround to __fegetround to avoid PLT calls since fegetround if not an
inline version anymore).

---

2013-11-20  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/ieee754/ldbl-128ibm/e_expl.c (__ieee754_expl): Use
	SET_RESTORE_ROUND instead of feholdexcept/fesetround/fesetenv.
	* sysdeps/powerpc/fpu/fenv_libc.h (__fegetround): Remove define.
	(__fesetround): Remove define.
	* sysdeps/powerpc/fpu/fenv_private.h: New file: Inline floating point
	rounding and exceptions handling.
	* sysdeps/powerpc/fpu/math_private.h: Include fenv_private.h.
	* sysdeps/powerpc/fpu/fenv_libc.h (__fe_mask_env): Define as hidden.
	(__fe_nomask_env): Likewise.
	* sysdeps/ieee754/ldbl-128ibm/s_llrintl.c (__llrintl): Use 
	__fegetround instead of fegetround.
	* sysdeps/ieee754/ldbl-128ibm/s_lrintl.c (__lrintl): Likewise.
	* sysdeps/ieee754/ldbl-128ibm/s_rintl.c (__rintl): Likewise.

--

diff --git a/sysdeps/ieee754/ldbl-128ibm/e_expl.c b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
index f7c50bf..65ef185 100644
--- a/sysdeps/ieee754/ldbl-128ibm/e_expl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/e_expl.c
@@ -134,18 +134,17 @@ static const long double C[] = {
 long double
 __ieee754_expl (long double x)
 {
+  long double result, x22;
+  union ibm_extended_long_double ex2_u, scale_u;
+  int unsafe;
+
   /* Check for usual case.  */
   if (isless (x, himark) && isgreater (x, lomark))
     {
-      int tval1, tval2, unsafe, n_i, exponent2;
-      long double x22, n, result, xl;
-      union ibm_extended_long_double ex2_u, scale_u;
-      fenv_t oldenv;
-
-      feholdexcept (&oldenv);
-#ifdef FE_TONEAREST
-      fesetround (FE_TONEAREST);
-#endif
+      int tval1, tval2, n_i, exponent2;
+      long double n, xl;
+
+      SET_RESTORE_ROUND (FE_TONEAREST);
 
       n = __roundl (x*M_1_LN2);
       x = x-n*M_LN2_0;
@@ -201,11 +200,6 @@ __ieee754_expl (long double x)
 	 less than 4.8e-39.  */
       x22 = x + x*x*(P1+x*(P2+x*(P3+x*(P4+x*(P5+x*P6)))));
 
-      /* Return result.  */
-      fesetenv (&oldenv);
-
-      result = x22 * ex2_u.ld + ex2_u.ld;
-
       /* Now we can test whether the result is ultimate or if we are unsure.
 	 In the later case we should probably call a mpn based routine to give
 	 the ultimate result.
@@ -235,10 +229,6 @@ __ieee754_expl (long double x)
 	    return __ieee754_expl_proc2 (origx);
 	  }
        */
-      if (!unsafe)
-	return result;
-      else
-	return result * scale_u.ld;
     }
   /* Exceptional cases:  */
   else if (isless (x, himark))
@@ -253,5 +243,10 @@ __ieee754_expl (long double x)
   else
     /* Return x, if x is a NaN or Inf; or overflow, otherwise.  */
     return TWO1023*x;
+
+  result = x22 * ex2_u.ld + ex2_u.ld;
+  if (!unsafe)
+    return result;
+  return result * scale_u.ld;
 }
 strong_alias (__ieee754_expl, __expl_finite)
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
index 8560349..3503973 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_llrintl.c
@@ -43,7 +43,7 @@ __llrintl (long double x)
 #endif
     )
     {
-      save_round = fegetround ();
+      save_round = __fegetround ();
 
       if (__builtin_expect ((xh == -(double) (-__LONG_LONG_MAX__ - 1)), 0))
 	{
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
index 588098d..49dbd42 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_lrintl.c
@@ -49,7 +49,7 @@ __lrintl (long double x)
 #endif
     )
     {
-      save_round = fegetround ();
+      save_round = __fegetround ();
 
 #if __LONG_MAX__ == 2147483647
       long long llhi = (long long) xh;
diff --git a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
index 48dbe85..5fd6bb8 100644
--- a/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
+++ b/sysdeps/ieee754/ldbl-128ibm/s_rintl.c
@@ -40,7 +40,7 @@ __rintl (long double x)
 					     __builtin_inf ()), 1))
     {
       double orig_xh;
-      int save_round = fegetround ();
+      int save_round = __fegetround ();
 
       /* Long double arithmetic, including the canonicalisation below,
 	 only works in round-to-nearest mode.  */
diff --git a/sysdeps/powerpc/fpu/fenv_libc.h b/sysdeps/powerpc/fpu/fenv_libc.h
index cb15c1c..ecd6b91 100644
--- a/sysdeps/powerpc/fpu/fenv_libc.h
+++ b/sysdeps/powerpc/fpu/fenv_libc.h
@@ -23,9 +23,9 @@
 #include <ldsodefs.h>
 #include <sysdep.h>
 
-extern const fenv_t *__fe_nomask_env (void);
+extern const fenv_t *__fe_nomask_env (void) attribute_hidden;
 
-extern const fenv_t *__fe_mask_env (void);
+extern const fenv_t *__fe_mask_env (void) attribute_hidden;
 
 /* The sticky bits in the FPSCR indicating exceptions have occurred.  */
 #define FPSCR_STICKY_BITS ((FE_ALL_EXCEPT | FE_ALL_INVALID) & ~FE_INVALID)
@@ -83,7 +83,6 @@ __fegetround (void)
 		"mfcr  %0" : "=r"(result) : : "cr7");
   return result & 3;
 }
-#define fegetround() __fegetround()
 
 static inline int
 __fesetround (int round)
@@ -107,7 +106,6 @@ __fesetround (int round)
 
   return 0;
 }
-#define fesetround(mode) __fesetround(mode)
 
 /* Definitions of all the FPSCR bit numbers */
 enum {
diff --git a/sysdeps/powerpc/fpu/fenv_private.h b/sysdeps/powerpc/fpu/fenv_private.h
new file mode 100644
index 0000000..52db35a
--- /dev/null
+++ b/sysdeps/powerpc/fpu/fenv_private.h
@@ -0,0 +1,274 @@
+/* Private floating point rounding and exceptions handling. PowerPC version.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef FENV_PRIVATE_H
+#define FENV_PRIVATE_H 1
+
+#include <fenv.h>
+#include <fenv_libc.h>
+#include <fpu_control.h>
+
+#define _FPU_MASK_ALL (_FPU_MASK_ZM | _FPU_MASK_OM | _FPU_MASK_UM \
+                      | _FPU_MASK_XM | _FPU_MASK_IM)
+
+/* Mask everything but the rounding moded and non-IEEE arithmetic flags.  */
+#define _FPU_MASK_ROUNDING 0xffffffff00000007LL
+
+/* Mask restore rounding mode and exception enabled.  */
+#define _FPU_MASK_EXCEPT_ROUND 0xffffffff1fffff00LL
+
+/* Mask exception enable but fraction rounded/inexact and FP result/CC
+   bits.  */
+#define _FPU_MASK_FRAC_INEX_RET_CC 0x1ff80fff
+
+static __always_inline void
+libc_feholdexcept_ppc (fenv_t *envp)
+{
+  fenv_union_t old, new;
+
+  old.fenv = *envp = fegetenv_register ();
+
+  new.l = old.l & _FPU_MASK_ROUNDING;
+
+  /* If the old env had any enabled exceptions, then mask SIGFPE in the
+     MSR FE0/FE1 bits.  This may allow the FPU to run faster because it
+     always takes the default action and can not generate SIGFPE.  */
+  if ((old.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (new.fenv);
+}
+
+static __always_inline void
+libc_fesetround_ppc (int r)
+{
+  fesetround (r);
+}
+
+static __always_inline void
+libc_feholdexcept_setround_ppc (fenv_t *envp, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = *envp = fegetenv_register ();
+
+  new.l = (old.l & _FPU_MASK_ROUNDING) | r;
+
+  if ((old.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (new.fenv);
+}
+
+static __always_inline int
+libc_fetestexcept_ppc (int e)
+{
+  fenv_union_t u;
+  u.fenv = fegetenv_register ();
+  return u.l & e;
+}
+
+static __always_inline void
+libc_fesetenv_ppc (const fenv_t *envp)
+{
+  fenv_union_t old, new;
+
+  new.fenv = *envp;
+  old.fenv = fegetenv_register ();
+
+  /* If the old env has no enabled exceptions and the new env has any enabled
+     exceptions, then unmask SIGFPE in the MSR FE0/FE1 bits.  This will put the
+     hardware into "precise mode" and may cause the FPU to run slower on some
+     hardware.  */
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_nomask_env ();
+
+  /* If the old env had any enabled exceptions and the new env has no enabled
+     exceptions, then mask SIGFPE in the MSR FE0/FE1 bits.  This may allow the
+     FPU to run faster because it always takes the default action and can not
+     generate SIGFPE.  */
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (*envp);
+}
+
+static __always_inline int
+libc_feupdateenv_test_ppc (fenv_t *envp, int ex)
+{
+  fenv_union_t old, new;
+
+  new.fenv = *envp;
+  old.fenv = fegetenv_register ();
+
+  /* Restore rounding mode and exception enable from *envp and merge
+     exceptions.  Leave fraction rounded/inexact and FP result/CC bits
+     unchanged.  */
+  new.l = (old.l & _FPU_MASK_EXCEPT_ROUND) 
+          | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
+
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_nomask_env ();
+
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+    (void) __fe_mask_env ();
+
+  fesetenv_register (new.fenv);
+
+  return old.l & ex;
+}
+
+static __always_inline void
+libc_feupdateenv_ppc (fenv_t *e)
+{
+  libc_feupdateenv_test_ppc (e, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_ppc (fenv_t *e, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = fegetenv_register ();
+  /* Clear current precision and set newer one.  */
+  new.l = (old.l & ~0x3) | r;
+  *e = old.fenv;
+
+  if ((old.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_mask_env ();
+  fesetenv_register (new.fenv);
+}
+
+static __always_inline void
+libc_feresetround_ppc (fenv_t *envp)
+{
+  fenv_union_t old, new;
+
+  new.fenv = *envp;
+  old.fenv = fegetenv_register ();
+
+  /* Restore rounding mode and exception enable from *envp and merge
+     exceptions.  Leave fraction rounded/inexact and FP result/CC bits
+     unchanged.  */
+  new.l = (old.l & _FPU_MASK_EXCEPT_ROUND)
+          | (new.l & _FPU_MASK_FRAC_INEX_RET_CC);
+
+  if ((old.l & _FPU_MASK_ALL) == 0 && (new.l & _FPU_MASK_ALL) != 0)
+    (void) __fe_nomask_env ();
+
+  if ((old.l & _FPU_MASK_ALL) != 0 && (new.l & _FPU_MASK_ALL) == 0)
+    (void) __fe_mask_env ();
+
+  /* Atomically enable and raise (if appropriate) exceptions set in `new'.  */
+  fesetenv_register (new.fenv);
+}
+
+#define libc_feholdexceptf           libc_feholdexcept_ppc
+#define libc_feholdexcept            libc_feholdexcept_ppc
+#define libc_feholdexcept_setroundf  libc_feholdexcept_setround_ppc
+#define libc_feholdexcept_setround   libc_feholdexcept_setround_ppc
+#define libc_fetestexceptf           libc_fetestexcept_ppc
+#define libc_fetestexcept            libc_fetestexcept_ppc
+#define libc_fesetroundf             libc_fesetround_ppc
+#define libc_fesetround              libc_fesetround_ppc
+#define libc_fesetenvf               libc_fesetenv_ppc
+#define libc_fesetenv                libc_fesetenv_ppc
+#define libc_feupdateenv_testf       libc_feupdateenv_test_ppc
+#define libc_feupdateenv_test        libc_feupdateenv_test_ppc
+#define libc_feupdateenvf            libc_feupdateenv_ppc
+#define libc_feupdateenv             libc_feupdateenv_ppc
+#define libc_feholdsetroundf         libc_feholdsetround_ppc
+#define libc_feholdsetround          libc_feholdsetround_ppc
+#define libc_feresetroundf           libc_feresetround_ppc
+#define libc_feresetround            libc_feresetround_ppc
+
+
+/* We have support for rounding mode context.  */
+#define HAVE_RM_CTX 1
+
+static __always_inline void
+libc_feholdexcept_setround_ppc_ctx (struct rm_ctx *ctx, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = fegetenv_register ();
+
+  new.l = (old.l & _FPU_MASK_ROUNDING) | r;
+  ctx->env = old.fenv;
+  if (__glibc_unlikely (new.l != old.l))
+    {
+      if ((old.l & _FPU_MASK_ALL) != 0)
+	(void) __fe_mask_env ();
+      fesetenv_register (new.fenv);
+      ctx->updated_status = true;
+    }
+  else
+    ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_fesetenv_ppc_ctx (struct rm_ctx *ctx)
+{
+  libc_fesetenv_ppc (&ctx->env);
+}
+
+static __always_inline void
+libc_feupdateenv_ppc_ctx (struct rm_ctx *ctx)
+{
+  if (__glibc_unlikely (ctx->updated_status))
+    libc_feupdateenv_test_ppc (&ctx->env, 0);
+}
+
+static __always_inline void
+libc_feholdsetround_ppc_ctx (struct rm_ctx *ctx, int r)
+{
+  fenv_union_t old, new;
+
+  old.fenv = fegetenv_register ();
+  new.l = (old.l & ~0x3) | r;
+  ctx->env = old.fenv;
+  if (__glibc_unlikely (new.l != old.l))
+    {
+      if ((old.l & _FPU_MASK_ALL) != 0)
+	(void) __fe_mask_env ();
+      fesetenv_register (new.fenv);
+      ctx->updated_status = true;
+    }
+  else
+    ctx->updated_status = false;
+}
+
+static __always_inline void
+libc_feresetround_ppc_ctx (struct rm_ctx *ctx)
+{
+  if (__glibc_unlikely (ctx->updated_status))
+    libc_feresetround_ppc (&ctx->env);
+}
+
+#define libc_feholdexcept_setroundf_ctx  libc_feholdexcept_setround_ppc_ctx
+#define libc_feholdexcept_setround_ctx   libc_feholdexcept_setround_ppc_ctx
+#define libc_fesetenv_ctx                libc_fesetenv_ppc_ctx
+#define libc_fesetenvf_ctx               libc_fesetenv_ppc_ctx
+#define libc_feholdsetround_ctx          libc_feholdsetround_ppc_ctx
+#define libc_feholdsetroundf_ctx         libc_feholdsetround_ppc_ctx
+#define libc_feresetround_ctx            libc_feresetround_ppc_ctx
+#define libc_feresetroundf_ctx           libc_feresetround_ppc_ctx
+#define libc_feupdateenvf_ctx            libc_feupdateenv_ppc_ctx
+#define libc_feupdateenv_ctx             libc_feupdateenv_ppc_ctx
+
+#endif
diff --git a/sysdeps/powerpc/fpu/math_private.h b/sysdeps/powerpc/fpu/math_private.h
index 6c00785..c8833d6 100644
--- a/sysdeps/powerpc/fpu/math_private.h
+++ b/sysdeps/powerpc/fpu/math_private.h
@@ -22,6 +22,7 @@
 #include <sysdep.h>
 #include <ldsodefs.h>
 #include <dl-procinfo.h>
+#include <fenv_private.h>
 #include_next <math_private.h>
 
 # if __WORDSIZE == 64 || defined _ARCH_PWR4


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]