This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH 1/4] [Powerpc] tune/optimize memmove/wordcopy. Add helpermacro to call MERGE


[Powerpc] tune/optimize memmove/wordcopy. Add helper macro to call MERGE

Add helper macros to handle calling the MERGE function with the proper
alignments.  This significantly shortens the code.

2012-04-03  Will Schmidt <will_schmidt@vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/power6/wordcopy.c: Add fwd_align_merge and
	bwd_align_merge macros.
	* sysdeps/powerpc/powerpc32/power6/wordcopy.c: Likewise.
---
 sysdeps/powerpc/powerpc32/power6/wordcopy.c |  117 ++++----------
 sysdeps/powerpc/powerpc64/power6/wordcopy.c |  234 +++++----------------------
 2 files changed, 79 insertions(+), 272 deletions(-)

diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
index ddf2865..33858e2 100644
--- a/sysdeps/powerpc/powerpc32/power6/wordcopy.c
+++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
@@ -1,5 +1,5 @@
 /* _memcopy.c -- subroutines for memory copy functions.
-   Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1991-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Torbjorn Granlund (tege@sics.se).
    Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
@@ -66,6 +66,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
    DSTP should be aligned for memory operations on `op_t's, but SRCP must
    *not* be aligned.  */
 
+#define fwd_align_merge(align)						\
+  do									\
+    {									\
+      a1 = ((op_t *) srcp)[1];						\
+      a2 = ((op_t *) srcp)[2];						\
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8));	\
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8));	\
+      a0 = a2;								\
+      srcp += 2 * OPSIZ;						\
+      dstp += 2 * OPSIZ;						\
+      len -= 2;								\
+    }									\
+  while (len != 0)
+
 void
 _wordcopy_fwd_dest_aligned (dstp, srcp, len)
      long int dstp;
@@ -105,49 +119,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (3);
       break;
     }
 
@@ -192,6 +170,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
   while (len != 0);
 }
 
+#define bwd_align_merge(align)						\
+  do									\
+    {									\
+      srcp -= 2 * OPSIZ;						\
+      dstp -= 2 * OPSIZ;						\
+      a1 = ((op_t *) srcp)[1];						\
+      a0 = ((op_t *) srcp)[0];						\
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8));	\
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8));	\
+      a2 = a0;								\
+      len -= 2;								\
+    }									\
+  while (len != 0)
+
 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
    before SRCP to block finishing right before DSTP with LEN `op_t'
    words (not LEN bytes!).  DSTP should be aligned for memory
@@ -236,52 +228,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (3);
       break;
     }
 }
diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
index faddd94..bbe55ee 100644
--- a/sysdeps/powerpc/powerpc64/power6/wordcopy.c
+++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
@@ -1,5 +1,5 @@
 /* _memcopy.c -- subroutines for memory copy functions.
-   Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+   Copyright (C) 1991-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Torbjorn Granlund (tege@sics.se).
 
@@ -60,6 +60,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
   while (len != 0);
 }
 
+#define fwd_align_merge(align)						\
+  do									\
+    {									\
+      a1 = ((op_t *) srcp)[1];						\
+      a2 = ((op_t *) srcp)[2];						\
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));	\
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));	\
+      a0 = a2;								\
+      srcp += 2 * OPSIZ;						\
+      dstp += 2 * OPSIZ;						\
+      len -= 2;								\
+    }									\
+  while (len != 0)
+
+
 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
    DSTP should be aligned for memory operations on `op_t's, but SRCP must
@@ -104,109 +119,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (3);
       break;
     case 4:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
-          ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (4);
       break;
     case 5:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
-          ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (5);
       break;
     case 6:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
-          ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (6);
       break;
     case 7:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
-          ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (7);
       break;
     }
 
@@ -251,6 +182,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
   while (len != 0);
 }
 
+#define bwd_align_merge(align)						\
+  do									\
+    {									\
+      srcp -= 2 * OPSIZ;						\
+      dstp -= 2 * OPSIZ;						\
+      a1 = ((op_t *) srcp)[1];						\
+      a0 = ((op_t *) srcp)[0];						\
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));	\
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));	\
+      a2 = a0;								\
+      len -= 2;								\
+    }									\
+  while (len != 0)
+
 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
    before SRCP to block finishing right before DSTP with LEN `op_t'
    words (not LEN bytes!).  DSTP should be aligned for memory
@@ -295,116 +240,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (3);
       break;
     case 4:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
-          ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (4);
       break;
     case 5:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
-          ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (5);
       break;
     case 6:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
-          ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (6);
       break;
     case 7:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
-          ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (7);
       break;
     }
 }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]