This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]
GNU C Library master sources branch, master, updated. glibc-2.14-409-ga1a8716

From: drepper at sourceware dot org
To: glibc-cvs at sourceware dot org
Date: 22 Oct 2011 23:03:14 -0000
Subject: GNU C Library master sources branch, master, updated. glibc-2.14-409-ga1a8716
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  a1a8716924f31400e81319c9124e1182fd9e8e83 (commit)
      from  dbc36f153e7f35d8c890871528438bb8ade99886 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=a1a8716924f31400e81319c9124e1182fd9e8e83

commit a1a8716924f31400e81319c9124e1182fd9e8e83
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Sat Oct 22 19:02:20 2011 -0400

    Start using fma in the libm implementation

diff --git a/ChangeLog b/ChangeLog
index d36e848..5329c14 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,19 @@
+2011-10-22  Ulrich Drepper  <drepper@gmail.com>
+
+	* sysdeps/ieee754/dbl-64/dla.h: When compiling with FMA4 support
+	define DLA_FMA.
+	[DLA_FMA] (EMULV): Use DLA_FMA.
+	[DLA_FMA] (MUL12): Use EMULV.
+	* sysdeps/ieee754/dbl-64/doasin.c [DLA_FMA]: Don't define variables
+	that are not needed.
+	* sysdeps/ieee754/dbl-64/dosincos.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_atan2.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_log.c: Likewise.
+	* sysdeps/ieee754/dbl-64/e_pow.c: Likewise.
+	* sysdeps/ieee754/dbl-64/halfulp.c: Likewise.
+	* sysdeps/ieee754/dbl-64/s_atan.c: Likewise.
+	* sysdeps/ieee754/dbl-64/s_tan.c: Likewise.
+
 2011-10-22  Andreas Schwab  <schwab@linux-m68k.org>
 
 	* math/s_nan.c: Undef __nan.
diff --git a/sysdeps/ieee754/dbl-64/dla.h b/sysdeps/ieee754/dbl-64/dla.h
index bf73fa9..9f095f9 100644
--- a/sysdeps/ieee754/dbl-64/dla.h
+++ b/sysdeps/ieee754/dbl-64/dla.h
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * Written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation, Inc.
+ * Copyright (C) 2001, 2011 Free Software Foundation, Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -35,6 +35,18 @@
 /* IEEE double.                                                        */
 /***********************************************************************/
 
+/* We can use fma instructions if available.  */
+#if defined __x86_64__ || (defined __i386__ && defined __SSE2_MATH__)
+# ifdef __FMA4__
+#  define DLA_FMA(x,y,z) \
+	   ({ double __zz; \
+	      asm ("vfmaddsd %3, %2, %1, %0"				      \
+		   : "=x" (__zz) : "x" (x), "xm" (y), "x" (-z));	      \
+	      __zz; })
+# endif
+#endif
+
+
 /* CN = 1+2**27 = '41a0000002000000' IEEE double format */
 #define  CN   134217729.0
 
@@ -44,7 +56,7 @@
 /* z+zz = x+y exactly.                                                 */
 
 #define  EADD(x,y,z,zz)  \
-           z=(x)+(y);  zz=(ABS(x)>ABS(y)) ? (((x)-(z))+(y)) : (((y)-(z))+(x));
+	   z=(x)+(y);  zz=(ABS(x)>ABS(y)) ? (((x)-(z))+(y)) : (((y)-(z))+(x));
 
 
 /* Exact subtraction of two single-length floating point numbers, Dekker. */
@@ -52,7 +64,7 @@
 /* z+zz = x-y exactly.                                                    */
 
 #define  ESUB(x,y,z,zz)  \
-           z=(x)-(y);  zz=(ABS(x)>ABS(y)) ? (((x)-(z))-(y)) : ((x)-((y)+(z)));
+	   z=(x)-(y);  zz=(ABS(x)>ABS(y)) ? (((x)-(z))-(y)) : ((x)-((y)+(z)));
 
 
 /* Exact multiplication of two single-length floating point numbers,   */
@@ -60,10 +72,15 @@
 /* satisfies z+zz = x*y exactly. p,hx,tx,hy,ty are temporary           */
 /* storage variables of type double.                                   */
 
-#define  EMULV(x,y,z,zz,p,hx,tx,hy,ty)          \
-           p=CN*(x);  hx=((x)-p)+p;  tx=(x)-hx; \
-           p=CN*(y);  hy=((y)-p)+p;  ty=(y)-hy; \
-           z=(x)*(y); zz=(((hx*hy-z)+hx*ty)+tx*hy)+tx*ty;
+#ifdef DLA_FMA
+# define  EMULV(x,y,z,zz,p,hx,tx,hy,ty)          \
+	   z=x*y; zz=DLA_FMA(x,y,z);
+#else
+# define  EMULV(x,y,z,zz,p,hx,tx,hy,ty)          \
+	   p=CN*(x);  hx=((x)-p)+p;  tx=(x)-hx; \
+	   p=CN*(y);  hy=((y)-p)+p;  ty=(y)-hy; \
+	   z=(x)*(y); zz=(((hx*hy-z)+hx*ty)+tx*hy)+tx*ty;
+#endif
 
 
 /* Exact multiplication of two single-length floating point numbers, Dekker. */
@@ -71,10 +88,15 @@
 /* that satisfies z+zz = x*y exactly. p,hx,tx,hy,ty,q are temporary          */
 /* storage variables of type double.                                         */
 
-#define  MUL12(x,y,z,zz,p,hx,tx,hy,ty,q)        \
-           p=CN*(x);  hx=((x)-p)+p;  tx=(x)-hx; \
-           p=CN*(y);  hy=((y)-p)+p;  ty=(y)-hy; \
-           p=hx*hy;  q=hx*ty+tx*hy; z=p+q;  zz=((p-z)+q)+tx*ty;
+#ifdef DLA_FMA
+# define  MUL12(x,y,z,zz,p,hx,tx,hy,ty,q)        \
+	   EMULV(x,y,z,zz,p,hx,tx,hy,ty)
+#else
+# define  MUL12(x,y,z,zz,p,hx,tx,hy,ty,q)        \
+	   p=CN*(x);  hx=((x)-p)+p;  tx=(x)-hx; \
+	   p=CN*(y);  hy=((y)-p)+p;  ty=(y)-hy; \
+	   p=hx*hy;  q=hx*ty+tx*hy; z=p+q;  zz=((p-z)+q)+tx*ty;
+#endif
 
 
 /* Double-length addition, Dekker. The macro produces a double-length   */
@@ -84,10 +106,10 @@
 /* storage variables of type double.                                    */
 
 #define  ADD2(x,xx,y,yy,z,zz,r,s)                    \
-           r=(x)+(y);  s=(ABS(x)>ABS(y)) ?           \
-                       (((((x)-r)+(y))+(yy))+(xx)) : \
-                       (((((y)-r)+(x))+(xx))+(yy));  \
-           z=r+s;  zz=(r-z)+s;
+	   r=(x)+(y);  s=(ABS(x)>ABS(y)) ?           \
+		       (((((x)-r)+(y))+(yy))+(xx)) : \
+		       (((((y)-r)+(x))+(xx))+(yy));  \
+	   z=r+s;  zz=(r-z)+s;
 
 
 /* Double-length subtraction, Dekker. The macro produces a double-length  */
@@ -97,10 +119,10 @@
 /* storage variables of type double.                                      */
 
 #define  SUB2(x,xx,y,yy,z,zz,r,s)                    \
-           r=(x)-(y);  s=(ABS(x)>ABS(y)) ?           \
-                       (((((x)-r)-(y))-(yy))+(xx)) : \
-                       ((((x)-((y)+r))+(xx))-(yy));  \
-           z=r+s;  zz=(r-z)+s;
+	   r=(x)-(y);  s=(ABS(x)>ABS(y)) ?           \
+		       (((((x)-r)-(y))-(yy))+(xx)) : \
+		       ((((x)-((y)+r))+(xx))-(yy));  \
+	   z=r+s;  zz=(r-z)+s;
 
 
 /* Double-length multiplication, Dekker. The macro produces a double-length  */
@@ -110,8 +132,8 @@
 /* temporary storage variables of type double.                               */
 
 #define  MUL2(x,xx,y,yy,z,zz,p,hx,tx,hy,ty,q,c,cc)  \
-           MUL12(x,y,c,cc,p,hx,tx,hy,ty,q)          \
-           cc=((x)*(yy)+(xx)*(y))+cc;   z=c+cc;   zz=(c-z)+cc;
+	   MUL12(x,y,c,cc,p,hx,tx,hy,ty,q)          \
+	   cc=((x)*(yy)+(xx)*(y))+cc;   z=c+cc;   zz=(c-z)+cc;
 
 
 /* Double-length division, Dekker. The macro produces a double-length        */
@@ -121,8 +143,8 @@
 /* are temporary storage variables of type double.                           */
 
 #define  DIV2(x,xx,y,yy,z,zz,p,hx,tx,hy,ty,q,c,cc,u,uu)  \
-           c=(x)/(y);   MUL12(c,y,u,uu,p,hx,tx,hy,ty,q)  \
-           cc=(((((x)-u)-uu)+(xx))-c*(yy))/(y);   z=c+cc;   zz=(c-z)+cc;
+	   c=(x)/(y);   MUL12(c,y,u,uu,p,hx,tx,hy,ty,q)  \
+	   cc=(((((x)-u)-uu)+(xx))-c*(yy))/(y);   z=c+cc;   zz=(c-z)+cc;
 
 
 /* Double-length addition, slower but more accurate than ADD2.               */
@@ -133,17 +155,17 @@
 /* are temporary storage variables of type double.                           */
 
 #define  ADD2A(x,xx,y,yy,z,zz,r,rr,s,ss,u,uu,w)                        \
-           r=(x)+(y);                                                  \
-           if (ABS(x)>ABS(y)) { rr=((x)-r)+(y);  s=(rr+(yy))+(xx); }   \
-           else               { rr=((y)-r)+(x);  s=(rr+(xx))+(yy); }   \
-           if (rr!=0.0) {                                              \
-             z=r+s;  zz=(r-z)+s; }                                     \
-           else {                                                      \
-             ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)+(yy)) : (((yy)-s)+(xx)); \
-             u=r+s;                                                    \
-             uu=(ABS(r)>ABS(s))   ? ((r-u)+s)   : ((s-u)+r)  ;         \
-             w=uu+ss;  z=u+w;                                          \
-             zz=(ABS(u)>ABS(w))   ? ((u-z)+w)   : ((w-z)+u)  ; }
+	   r=(x)+(y);                                                  \
+	   if (ABS(x)>ABS(y)) { rr=((x)-r)+(y);  s=(rr+(yy))+(xx); }   \
+	   else               { rr=((y)-r)+(x);  s=(rr+(xx))+(yy); }   \
+	   if (rr!=0.0) {                                              \
+	     z=r+s;  zz=(r-z)+s; }                                     \
+	   else {                                                      \
+	     ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)+(yy)) : (((yy)-s)+(xx)); \
+	     u=r+s;                                                    \
+	     uu=(ABS(r)>ABS(s))   ? ((r-u)+s)   : ((s-u)+r)  ;         \
+	     w=uu+ss;  z=u+w;                                          \
+	     zz=(ABS(u)>ABS(w))   ? ((u-z)+w)   : ((w-z)+u)  ; }
 
 
 /* Double-length subtraction, slower but more accurate than SUB2.            */
@@ -154,21 +176,14 @@
 /* are temporary storage variables of type double.                           */
 
 #define  SUB2A(x,xx,y,yy,z,zz,r,rr,s,ss,u,uu,w)                        \
-           r=(x)-(y);                                                  \
-           if (ABS(x)>ABS(y)) { rr=((x)-r)-(y);  s=(rr-(yy))+(xx); }   \
-           else               { rr=(x)-((y)+r);  s=(rr+(xx))-(yy); }   \
-           if (rr!=0.0) {                                              \
-             z=r+s;  zz=(r-z)+s; }                                     \
-           else {                                                      \
-             ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)-(yy)) : ((xx)-((yy)+s)); \
-             u=r+s;                                                    \
-             uu=(ABS(r)>ABS(s))   ? ((r-u)+s)   : ((s-u)+r)  ;         \
-             w=uu+ss;  z=u+w;                                          \
-             zz=(ABS(u)>ABS(w))   ? ((u-z)+w)   : ((w-z)+u)  ; }
-
-
-
-
-
-
-
+	   r=(x)-(y);                                                  \
+	   if (ABS(x)>ABS(y)) { rr=((x)-r)-(y);  s=(rr-(yy))+(xx); }   \
+	   else               { rr=(x)-((y)+r);  s=(rr+(xx))-(yy); }   \
+	   if (rr!=0.0) {                                              \
+	     z=r+s;  zz=(r-z)+s; }                                     \
+	   else {                                                      \
+	     ss=(ABS(xx)>ABS(yy)) ? (((xx)-s)-(yy)) : ((xx)-((yy)+s)); \
+	     u=r+s;                                                    \
+	     uu=(ABS(r)>ABS(s))   ? ((r-u)+s)   : ((s-u)+r)  ;         \
+	     w=uu+ss;  z=u+w;                                          \
+	     zz=(ABS(u)>ABS(w))   ? ((u-z)+w)   : ((w-z)+u)  ; }
diff --git a/sysdeps/ieee754/dbl-64/doasin.c b/sysdeps/ieee754/dbl-64/doasin.c
index 79f344a..9ed7609 100644
--- a/sysdeps/ieee754/dbl-64/doasin.c
+++ b/sysdeps/ieee754/dbl-64/doasin.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -52,7 +52,10 @@ void __doasin(double x, double dx, double v[]) {
     d11 = 0.79470250400727425881446981833568758E-02;
 
   double xx,p,pp,u,uu,r,s;
-  double hx,tx,hy,ty,tp,tq,tc,tcc;
+  double tc,tcc;
+#ifndef DLA_FMA
+  double hx,tx,hy,ty,tp,tq;
+#endif
 
 
 /* Taylor series for arcsin for Double-Length numbers         */
diff --git a/sysdeps/ieee754/dbl-64/dosincos.c b/sysdeps/ieee754/dbl-64/dosincos.c
index 1d347a4..654f342 100644
--- a/sysdeps/ieee754/dbl-64/dosincos.c
+++ b/sysdeps/ieee754/dbl-64/dosincos.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -48,8 +48,11 @@
 /***********************************************************************/
 
 void __dubsin(double x, double dx, double v[]) {
-  double r,s,p,hx,tx,hy,ty,q,c,cc,d,dd,d2,dd2,e,ee,
+  double r,s,c,cc,d,dd,d2,dd2,e,ee,
     sn,ssn,cs,ccs,ds,dss,dc,dcc;
+#ifndef DLA_FMA
+  double p,hx,tx,hy,ty,q;
+#endif
 #if 0
   double xx,y,yy,z,zz;
 #endif
@@ -61,7 +64,7 @@ void __dubsin(double x, double dx, double v[]) {
   x=x-(u.x-big.x);
   d=x+dx;
   dd=(x-d)+dx;
-         /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
+	 /* sin(x+dx)=sin(Xi+t)=sin(Xi)*cos(t) + cos(Xi)sin(t) where t ->0 */
   MUL2(d,dd,d,dd,d2,dd2,p,hx,tx,hy,ty,q,c,cc);
   sn=sincos.x[k];     /*                                  */
   ssn=sincos.x[k+1];  /*      sin(Xi) and cos(Xi)         */
@@ -99,8 +102,11 @@ void __dubsin(double x, double dx, double v[]) {
 /**********************************************************************/
 
 void __dubcos(double x, double dx, double v[]) {
-  double r,s,p,hx,tx,hy,ty,q,c,cc,d,dd,d2,dd2,e,ee,
+  double r,s,c,cc,d,dd,d2,dd2,e,ee,
     sn,ssn,cs,ccs,ds,dss,dc,dcc;
+#ifndef DLA_FMA
+  double p,hx,tx,hy,ty,q;
+#endif
 #if 0
   double xx,y,yy,z,zz;
 #endif
@@ -166,15 +172,15 @@ void __docos(double x, double dx, double v[]) {
   if (x>0) {y=x; yy=dx;}
      else {y=-x; yy=-dx;}
   if (y<0.5*hp0.x)                                 /*  y< PI/4    */
-           {__dubcos(y,yy,w); v[0]=w[0]; v[1]=w[1];}
+	   {__dubcos(y,yy,w); v[0]=w[0]; v[1]=w[1];}
      else if (y<1.5*hp0.x) {                       /* y< 3/4 * PI */
        p=hp0.x-y;  /* p = PI/2 - y */
        yy=hp1.x-yy;
        y=p+yy;
        yy=(p-y)+yy;
        if (y>0) {__dubsin(y,yy,w); v[0]=w[0]; v[1]=w[1];}
-                                       /* cos(x) = sin ( 90 -  x ) */
-         else {__dubsin(-y,-yy,w); v[0]=-w[0]; v[1]=-w[1];
+				       /* cos(x) = sin ( 90 -  x ) */
+	 else {__dubsin(-y,-yy,w); v[0]=-w[0]; v[1]=-w[1];
 	 }
      }
   else { /* y>= 3/4 * PI */
diff --git a/sysdeps/ieee754/dbl-64/e_atan2.c b/sysdeps/ieee754/dbl-64/e_atan2.c
index 784fc5a..4d8c23a 100644
--- a/sysdeps/ieee754/dbl-64/e_atan2.c
+++ b/sysdeps/ieee754/dbl-64/e_atan2.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -62,8 +62,11 @@ double __ieee754_atan2(double y,double x) {
   int p;
 #endif
   static const int pr[MM]={6,8,10,20,32};
-  double ax,ay,u,du,u9,ua,v,vv,dv,t1,t2,t3,t4,t5,t6,t7,t8,
+  double ax,ay,u,du,u9,ua,v,vv,dv,t1,t2,t3,t7,t8,
 	 z,zz,cor,s1,ss1,s2,ss2;
+#ifndef DLA_FMA
+  double t4,t5,t6;
+#endif
 #if 0
   double z1,z2;
 #endif
diff --git a/sysdeps/ieee754/dbl-64/e_log.c b/sysdeps/ieee754/dbl-64/e_log.c
index 0fc6f92..c158c8b 100644
--- a/sysdeps/ieee754/dbl-64/e_log.c
+++ b/sysdeps/ieee754/dbl-64/e_log.c
@@ -56,8 +56,11 @@ double __ieee754_log(double x) {
 #endif
   double dbl_n,u,p0,q,r0,w,nln2a,luai,lubi,lvaj,lvbj,
 	 sij,ssij,ttij,A,B,B0,y,y1,y2,polI,polII,sa,sb,
-	 t1,t2,t3,t4,t5,t6,t7,t8,t,ra,rb,ww,
+	 t1,t2,t7,t8,t,ra,rb,ww,
 	 a0,aa0,s1,s2,ss2,s3,ss3,a1,aa1,a,aa,b,bb,c;
+#ifndef DLA_FMA
+  double t3,t4,t5,t6;
+#endif
   number num;
   mp_no mpx,mpy,mpy1,mpy2,mperr;
 
diff --git a/sysdeps/ieee754/dbl-64/e_pow.c b/sysdeps/ieee754/dbl-64/e_pow.c
index 83a5eff..643e1cb 100644
--- a/sysdeps/ieee754/dbl-64/e_pow.c
+++ b/sysdeps/ieee754/dbl-64/e_pow.c
@@ -283,7 +283,10 @@ static double my_log2(double x, double *delta, double *error) {
   double cor;
 #endif
   double ou1,ou2,lu1,lu2,ov,lv1,lv2,a,a1,a2;
-  double y,yy,z,zz,j1,j2,j3,j4,j5,j6,j7,j8;
+  double y,yy,z,zz,j1,j2,j7,j8;
+#ifndef DLA_FMA
+  double j3,j4,j5,j6;
+#endif
   mynumber u,v;
 #ifdef BIG_ENDI
   mynumber
diff --git a/sysdeps/ieee754/dbl-64/halfulp.c b/sysdeps/ieee754/dbl-64/halfulp.c
index 42b21fb..5d27334 100644
--- a/sysdeps/ieee754/dbl-64/halfulp.c
+++ b/sysdeps/ieee754/dbl-64/halfulp.c
@@ -50,7 +50,10 @@ static const int4 tab54[32] = {
 double __halfulp(double x, double y)
 {
   mynumber v;
-  double z,u,uu,j1,j2,j3,j4,j5;
+  double z,u,uu;
+#ifndef DLA_FMA
+  double j1,j2,j3,j4,j5;
+#endif
   int4 k,l,m,n;
   if (y <= 0) {               /*if power is negative or zero */
     v.x = y;
diff --git a/sysdeps/ieee754/dbl-64/s_atan.c b/sysdeps/ieee754/dbl-64/s_atan.c
index 556f5b2..b948f50 100644
--- a/sysdeps/ieee754/dbl-64/s_atan.c
+++ b/sysdeps/ieee754/dbl-64/s_atan.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001 Free Software Foundation
+ * Copyright (C) 2001, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -52,8 +52,11 @@ double __signArctan(double,double);
 double atan(double x) {
 
 
-  double cor,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,u,u2,u3,
-         v,vv,w,ww,y,yy,z,zz;
+  double cor,s1,ss1,s2,ss2,t1,t2,t3,t7,t8,t9,t10,u,u2,u3,
+	 v,vv,w,ww,y,yy,z,zz;
+#ifndef DLA_FMA
+  double t4,t5,t6;
+#endif
 #if 0
   double y1,y2;
 #endif
@@ -78,44 +81,44 @@ double atan(double x) {
   if (u<C) {
     if (u<B) {
       if (u<A) {                                           /* u < A */
-         return x; }
+	 return x; }
       else {                                               /* A <= u < B */
-        v=x*x;  yy=x*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
-        if ((y=x+(yy-U1*x)) == x+(yy+U1*x))  return y;
-
-        EMULV(x,x,v,vv,t1,t2,t3,t4,t5)                       /* v+vv=x^2 */
-        s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
-        ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        MUL2(x,ZERO,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(x,ZERO,s2,ss2,s1,ss1,t1,t2)
-        if ((y=s1+(ss1-U5*s1)) == s1+(ss1+U5*s1))  return y;
-
-        return atanMp(x,pr);
+	v=x*x;  yy=x*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
+	if ((y=x+(yy-U1*x)) == x+(yy+U1*x))  return y;
+
+	EMULV(x,x,v,vv,t1,t2,t3,t4,t5)                       /* v+vv=x^2 */
+	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
+	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	MUL2(x,ZERO,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(x,ZERO,s2,ss2,s1,ss1,t1,t2)
+	if ((y=s1+(ss1-U5*s1)) == s1+(ss1+U5*s1))  return y;
+
+	return atanMp(x,pr);
       } }
     else {  /* B <= u < C */
       i=(TWO52+TWO8*u)-TWO52;  i-=16;
       z=u-cij[i][0].d;
       yy=z*(cij[i][2].d+z*(cij[i][3].d+z*(cij[i][4].d+
-                        z*(cij[i][5].d+z* cij[i][6].d))));
+			z*(cij[i][5].d+z* cij[i][6].d))));
       t1=cij[i][1].d;
       if (i<112) {
-        if (i<48)  u2=U21;    /* u < 1/4        */
-        else       u2=U22; }  /* 1/4 <= u < 1/2 */
+	if (i<48)  u2=U21;    /* u < 1/4        */
+	else       u2=U22; }  /* 1/4 <= u < 1/2 */
       else {
-        if (i<176) u2=U23;    /* 1/2 <= u < 3/4 */
-        else       u2=U24; }  /* 3/4 <= u <= 1  */
+	if (i<176) u2=U23;    /* 1/2 <= u < 3/4 */
+	else       u2=U24; }  /* 3/4 <= u <= 1  */
       if ((y=t1+(yy-u2*t1)) == t1+(yy+u2*t1))  return __signArctan(x,y);
 
       z=u-hij[i][0].d;
       s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+
-         z*(hij[i][14].d+z* hij[i][15].d))));
+	 z*(hij[i][14].d+z* hij[i][15].d))));
       ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
       MUL2(z,ZERO,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
       ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
@@ -138,7 +141,7 @@ double atan(double x) {
       i=(TWO52+TWO8*w)-TWO52;  i-=16;
       z=(w-cij[i][0].d)+ww;
       yy=HPI1-z*(cij[i][2].d+z*(cij[i][3].d+z*(cij[i][4].d+
-                             z*(cij[i][5].d+z* cij[i][6].d))));
+			     z*(cij[i][5].d+z* cij[i][6].d))));
       t1=HPI-cij[i][1].d;
       if (i<112)  u3=U31;  /* w <  1/2 */
       else        u3=U32;  /* w >= 1/2 */
@@ -148,7 +151,7 @@ double atan(double x) {
       t1=w-hij[i][0].d;
       EADD(t1,ww,z,zz)
       s1=z*(hij[i][11].d+z*(hij[i][12].d+z*(hij[i][13].d+
-         z*(hij[i][14].d+z* hij[i][15].d))));
+	 z*(hij[i][14].d+z* hij[i][15].d))));
       ADD2(hij[i][9].d,hij[i][10].d,s1,ZERO,s2,ss2,t1,t2)
       MUL2(z,zz,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
       ADD2(hij[i][7].d,hij[i][8].d,s1,ss1,s2,ss2,t1,t2)
@@ -165,36 +168,36 @@ double atan(double x) {
     }
     else {
       if (u<E) { /* D <= u < E */
-        w=ONE/u;   v=w*w;
-        EMULV(w,u,t1,t2,t3,t4,t5,t6,t7)
-        yy=w*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
-        ww=w*((ONE-t1)-t2);
-        ESUB(HPI,w,t3,cor)
-        yy=((HPI1+cor)-ww)-yy;
-        if ((y=t3+(yy-U4)) == t3+(yy+U4))  return __signArctan(x,y);
-
-        DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
-        MUL2(w,ww,w,ww,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
-        s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
-        ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
-        MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
-        MUL2(w,ww,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
-        ADD2(w,ww,s2,ss2,s1,ss1,t1,t2)
-        SUB2(HPI,HPI1,s1,ss1,s2,ss2,t1,t2)
-        if ((y=s2+(ss2-U8)) == s2+(ss2+U8))  return __signArctan(x,y);
+	w=ONE/u;   v=w*w;
+	EMULV(w,u,t1,t2,t3,t4,t5,t6,t7)
+	yy=w*v*(d3.d+v*(d5.d+v*(d7.d+v*(d9.d+v*(d11.d+v*d13.d)))));
+	ww=w*((ONE-t1)-t2);
+	ESUB(HPI,w,t3,cor)
+	yy=((HPI1+cor)-ww)-yy;
+	if ((y=t3+(yy-U4)) == t3+(yy+U4))  return __signArctan(x,y);
+
+	DIV2(ONE,ZERO,u,ZERO,w,ww,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
+	MUL2(w,ww,w,ww,v,vv,t1,t2,t3,t4,t5,t6,t7,t8)
+	s1=v*(f11.d+v*(f13.d+v*(f15.d+v*(f17.d+v*f19.d))));
+	ADD2(f9.d,ff9.d,s1,ZERO,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(f7.d,ff7.d,s1,ss1,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(f5.d,ff5.d,s1,ss1,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(f3.d,ff3.d,s1,ss1,s2,ss2,t1,t2)
+	MUL2(v,vv,s2,ss2,s1,ss1,t1,t2,t3,t4,t5,t6,t7,t8)
+	MUL2(w,ww,s1,ss1,s2,ss2,t1,t2,t3,t4,t5,t6,t7,t8)
+	ADD2(w,ww,s2,ss2,s1,ss1,t1,t2)
+	SUB2(HPI,HPI1,s1,ss1,s2,ss2,t1,t2)
+	if ((y=s2+(ss2-U8)) == s2+(ss2+U8))  return __signArctan(x,y);
 
       return atanMp(x,pr);
       }
       else {
-        /* u >= E */
-        if (x>0) return  HPI;
-        else     return MHPI; }
+	/* u >= E */
+	if (x>0) return  HPI;
+	else     return MHPI; }
     }
   }
 
diff --git a/sysdeps/ieee754/dbl-64/s_tan.c b/sysdeps/ieee754/dbl-64/s_tan.c
index 4e26d90..015b027 100644
--- a/sysdeps/ieee754/dbl-64/s_tan.c
+++ b/sysdeps/ieee754/dbl-64/s_tan.c
@@ -1,7 +1,7 @@
 /*
  * IBM Accurate Mathematical Library
  * written by International Business Machines Corp.
- * Copyright (C) 2001, 2009 Free Software Foundation
+ * Copyright (C) 2001, 2009, 2011 Free Software Foundation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License as published by
@@ -50,7 +50,10 @@ double tan(double x) {
 
   int ux,i,n;
   double a,da,a2,b,db,c,dc,c1,cc1,c2,cc2,c3,cc3,fi,ffi,gi,pz,s,sy,
-  t,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,w,x2,xn,xx2,y,ya,yya,z0,z,zz,z2,zz2;
+  t,t1,t2,t3,t4,t7,t8,t9,t10,w,x2,xn,xx2,y,ya,yya,z0,z,zz,z2,zz2;
+#ifndef DLA_FMA
+  double t5,t6;
+#endif
   int p;
   number num,v;
   mp_no mpa,mpt1,mpt2;
@@ -84,7 +87,7 @@ double tan(double x) {
 
     /* Second stage */
     c1 = x2*(a15.d+x2*(a17.d+x2*(a19.d+x2*(a21.d+x2*(a23.d+x2*(a25.d+
-         x2*a27.d))))));
+	 x2*a27.d))))));
     EMULV(x,x,x2,xx2,t1,t2,t3,t4,t5)
     ADD2(a13.d,aa13.d,c1,zero.d,c2,cc2,t1,t2)
     MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8)
@@ -159,13 +162,13 @@ double tan(double x) {
       a2 = a*a;
       t2 = da+a*a2*(d3.d+a2*(d5.d+a2*(d7.d+a2*(d9.d+a2*d11.d))));
       if (n) {
-        /* First stage -cot */
-        EADD(a,t2,b,db)
-        DIV2(one.d,zero.d,b,db,c,dc,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
-        if ((y=c+(dc-u6.d*c))==c+(dc+u6.d*c))  return (-y); }
+	/* First stage -cot */
+	EADD(a,t2,b,db)
+	DIV2(one.d,zero.d,b,db,c,dc,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
+	if ((y=c+(dc-u6.d*c))==c+(dc+u6.d*c))  return (-y); }
       else {
-        /* First stage tan */
-        if ((y=a+(t2-u5.d*a))==a+(t2+u5.d*a))  return y; }
+	/* First stage tan */
+	if ((y=a+(t2-u5.d*a))==a+(t2+u5.d*a))  return y; }
       /* Second stage */
       /* Range reduction by algorithm ii */
       t = (x*hpinv.d + toint.d);
@@ -184,7 +187,7 @@ double tan(double x) {
       EADD(a,da,t1,t2)   a=t1;  da=t2;
       MUL2(a,da,a,da,x2,xx2,t1,t2,t3,t4,t5,t6,t7,t8)
       c1 = x2*(a15.d+x2*(a17.d+x2*(a19.d+x2*(a21.d+x2*(a23.d+x2*(a25.d+
-           x2*a27.d))))));
+	   x2*a27.d))))));
       ADD2(a13.d,aa13.d,c1,zero.d,c2,cc2,t1,t2)
       MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8)
       ADD2(a11.d,aa11.d,c1,cc1,c2,cc2,t1,t2)
@@ -201,12 +204,12 @@ double tan(double x) {
       ADD2(a  ,da  ,c2,cc2,c1,cc1,t1,t2)
 
       if (n) {
-        /* Second stage -cot */
-        DIV2(one.d,zero.d,c1,cc1,c2,cc2,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
-        if ((y=c2+(cc2-u8.d*c2)) == c2+(cc2+u8.d*c2))  return (-y); }
+	/* Second stage -cot */
+	DIV2(one.d,zero.d,c1,cc1,c2,cc2,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
+	if ((y=c2+(cc2-u8.d*c2)) == c2+(cc2+u8.d*c2))  return (-y); }
       else {
-        /* Second stage tan */
-        if ((y=c1+(cc1-u7.d*c1)) == c1+(cc1+u7.d*c1))  return y; }
+	/* Second stage tan */
+	if ((y=c1+(cc1-u7.d*c1)) == c1+(cc1+u7.d*c1))  return y; }
       return tanMp(x);
     }
 
@@ -287,18 +290,18 @@ double tan(double x) {
       a2 = a*a;
       t2 = da+a*a2*(d3.d+a2*(d5.d+a2*(d7.d+a2*(d9.d+a2*d11.d))));
       if (n) {
-        /* First stage -cot */
-        EADD(a,t2,b,db)
-        DIV2(one.d,zero.d,b,db,c,dc,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
-        if ((y=c+(dc-u14.d*c))==c+(dc+u14.d*c))  return (-y); }
+	/* First stage -cot */
+	EADD(a,t2,b,db)
+	DIV2(one.d,zero.d,b,db,c,dc,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
+	if ((y=c+(dc-u14.d*c))==c+(dc+u14.d*c))  return (-y); }
       else {
-        /* First stage tan */
-        if ((y=a+(t2-u13.d*a))==a+(t2+u13.d*a))  return y; }
+	/* First stage tan */
+	if ((y=a+(t2-u13.d*a))==a+(t2+u13.d*a))  return y; }
 
       /* Second stage */
       MUL2(a,da,a,da,x2,xx2,t1,t2,t3,t4,t5,t6,t7,t8)
       c1 = x2*(a15.d+x2*(a17.d+x2*(a19.d+x2*(a21.d+x2*(a23.d+x2*(a25.d+
-           x2*a27.d))))));
+	   x2*a27.d))))));
       ADD2(a13.d,aa13.d,c1,zero.d,c2,cc2,t1,t2)
       MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8)
       ADD2(a11.d,aa11.d,c1,cc1,c2,cc2,t1,t2)
@@ -315,12 +318,12 @@ double tan(double x) {
       ADD2(a  ,da  ,c2,cc2,c1,cc1,t1,t2)
 
       if (n) {
-        /* Second stage -cot */
-        DIV2(one.d,zero.d,c1,cc1,c2,cc2,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
-        if ((y=c2+(cc2-u16.d*c2)) == c2+(cc2+u16.d*c2))  return (-y); }
+	/* Second stage -cot */
+	DIV2(one.d,zero.d,c1,cc1,c2,cc2,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10)
+	if ((y=c2+(cc2-u16.d*c2)) == c2+(cc2+u16.d*c2))  return (-y); }
       else {
-        /* Second stage tan */
-        if ((y=c1+(cc1-u15.d*c1)) == c1+(cc1+u15.d*c1))  return (y); }
+	/* Second stage tan */
+	if ((y=c1+(cc1-u15.d*c1)) == c1+(cc1+u15.d*c1))  return (y); }
       return tanMp(x);
     }
 
@@ -404,7 +407,7 @@ double tan(double x) {
 
     MUL2(a,da,a,da,x2,xx2,t1,t2,t3,t4,t5,t6,t7,t8)
     c1 = x2*(a15.d+x2*(a17.d+x2*(a19.d+x2*(a21.d+x2*(a23.d+x2*(a25.d+
-         x2*a27.d))))));
+	 x2*a27.d))))));
     ADD2(a13.d,aa13.d,c1,zero.d,c2,cc2,t1,t2)
     MUL2(x2,xx2,c2,cc2,c1,cc1,t1,t2,t3,t4,t5,t6,t7,t8)
     ADD2(a11.d,aa11.d,c1,cc1,c2,cc2,t1,t2)

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                         |   16 +++++
 sysdeps/ieee754/dbl-64/dla.h      |  119 +++++++++++++++++++++----------------
 sysdeps/ieee754/dbl-64/doasin.c   |    7 ++-
 sysdeps/ieee754/dbl-64/dosincos.c |   20 ++++--
 sysdeps/ieee754/dbl-64/e_atan2.c  |    7 ++-
 sysdeps/ieee754/dbl-64/e_log.c    |    5 +-
 sysdeps/ieee754/dbl-64/e_pow.c    |    5 +-
 sysdeps/ieee754/dbl-64/halfulp.c  |    5 +-
 sysdeps/ieee754/dbl-64/s_atan.c   |  115 ++++++++++++++++++-----------------
 sysdeps/ieee754/dbl-64/s_tan.c    |   59 ++++++++++---------
 10 files changed, 208 insertions(+), 150 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources
Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]