This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 5/5] [Powerpc] tune/optimize memmove/wordcopy. preload srcpvalues


[Powerpc] tune/optimize memmove/wordcopy. preload srcp values

Rework the while loops to (pre-)load the srcp values ahead of time while
it is safe to do so (length != 0 ).

2012-03-12  Will Schmidt <will_schmidt@vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/power7/wordcopy.c: Load srcp values
	ahead of time if safe.
	* sysdeps/powerpc/powerpc32/power7/wordcopy.c: Likewise.
---
 sysdeps/powerpc/powerpc32/power7/wordcopy.c |   40 ++++++++++++-----
 sysdeps/powerpc/powerpc64/power7/wordcopy.c |   65 +++++++++++++++++----------
 2 files changed, 68 insertions(+), 37 deletions(-)

diff --git a/sysdeps/powerpc/powerpc32/power7/wordcopy.c b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
index 00bd444..7040b5c 100644
--- a/sysdeps/powerpc/powerpc32/power7/wordcopy.c
+++ b/sysdeps/powerpc/powerpc32/power7/wordcopy.c
@@ -67,18 +67,25 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
    *not* be aligned.  */
 
 #define fwd_align_merge(align)						\
+  {									\
+  a1 = ((op_t *) srcp)[1];						\
+  a2 = ((op_t *) srcp)[2];						\
   do									\
     {									\
-      a1 = ((op_t *) srcp)[1];						\
-      a2 = ((op_t *) srcp)[2];						\
       ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8));	\
       ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8));	\
-      a0 = a2;								\
-      srcp += 2 * OPSIZ;						\
-      dstp += 2 * OPSIZ;						\
       len -= 2;								\
+      if (len)								\
+        {								\
+          srcp += 2 * OPSIZ;						\
+          dstp += 2 * OPSIZ;						\
+          a0 = a2;							\
+          a1 = ((op_t *) srcp)[1];					\
+          a2 = ((op_t *) srcp)[2];					\
+        }								\
     }									\
-  while (len != 0);
+  while (len != 0);							\
+  }
 
 void
 _wordcopy_fwd_dest_aligned (dstp, srcp, len)
@@ -168,18 +175,27 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
 }
 
 #define bwd_align_merge(align)						\
+  {									\
+  srcp -= 2 * OPSIZ;							\
+  dstp -= 2 * OPSIZ;							\
+  a1 = ((op_t *) srcp)[1];						\
+  a0 = ((op_t *) srcp)[0];						\
   do									\
     {									\
-      srcp -= 2 * OPSIZ;						\
-      dstp -= 2 * OPSIZ;						\
-      a1 = ((op_t *) srcp)[1];						\
-      a0 = ((op_t *) srcp)[0];						\
       ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8));	\
       ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8));	\
-      a2 = a0;								\
       len -= 2;								\
+      if (len)								\
+        {								\
+          srcp -= 2 * OPSIZ;						\
+          dstp -= 2 * OPSIZ;						\
+          a2 = a0;							\
+          a1 = ((op_t *) srcp)[1];					\
+          a0 = ((op_t *) srcp)[0];					\
+        }								\
     }									\
-  while (len != 0);
+  while (len != 0);							\
+  }
 
 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
    before SRCP to block finishing right before DSTP with LEN `op_t'
diff --git a/sysdeps/powerpc/powerpc64/power7/wordcopy.c b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
index 1c7b99a..6c5bdc7 100644
--- a/sysdeps/powerpc/powerpc64/power7/wordcopy.c
+++ b/sysdeps/powerpc/powerpc64/power7/wordcopy.c
@@ -61,19 +61,25 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
 }
 
 #define fwd_align_merge(align)						\
-  do									\
-    {									\
-      a1 = ((op_t *) srcp)[1];						\
-      a2 = ((op_t *) srcp)[2];						\
-      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));	\
-      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));	\
-      a0 = a2;								\
-      srcp += 2 * OPSIZ;						\
-      dstp += 2 * OPSIZ;						\
-      len -= 2;								\
-    }									\
-  while (len != 0);
-
+  {									\
+    a1 = ((op_t *) srcp)[1];						\
+    a2 = ((op_t *) srcp)[2];						\
+    do									\
+      {									\
+        ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));	\
+        ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));	\
+        len -= 2;							\
+        if (len)							\
+          {								\
+            srcp += 2 * OPSIZ;						\
+            dstp += 2 * OPSIZ;						\
+            a0 = a2;							\
+            a1 = ((op_t *) srcp)[1];					\
+            a2 = ((op_t *) srcp)[2];					\
+          }								\
+      }									\
+    while (len != 0);							\
+  }
 
 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
@@ -188,18 +194,27 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
 }
 
 #define bwd_align_merge(align)						\
-  do									\
-    {									\
-      srcp -= 2 * OPSIZ;						\
-      dstp -= 2 * OPSIZ;						\
-      a1 = ((op_t *) srcp)[1];						\
-      a0 = ((op_t *) srcp)[0];						\
-      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));	\
-      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));	\
-      a2 = a0;								\
-      len -= 2;								\
-    }									\
-  while (len != 0);
+  {									\
+    srcp -= 2 * OPSIZ;							\
+    dstp -= 2 * OPSIZ;							\
+    a1 = ((op_t *) srcp)[1];						\
+    a0 = ((op_t *) srcp)[0];						\
+    do									\
+      {									\
+        ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));	\
+        ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));	\
+        len -= 2;							\
+        if (len)							\
+          {								\
+            srcp -= 2 * OPSIZ;						\
+            dstp -= 2 * OPSIZ;						\
+            a2 = a0;							\
+            a1 = ((op_t *) srcp)[1];					\
+            a0 = ((op_t *) srcp)[0];					\
+          }								\
+      }									\
+    while (len != 0);							\
+  }
 
 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
    before SRCP to block finishing right before DSTP with LEN `op_t'


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]