This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

PowerPC: Align power7 memcpy using VSX to quadword

From: Adhemerval Zanella <azanella at linux dot vnet dot ibm dot com>
To: "GNU C. Library" <libc-alpha at sourceware dot org>
Cc: Benjamin Herrenschmidt <benh at kernel dot crashing dot org>
Date: Fri, 13 Jun 2014 12:27:36 -0300
Subject: PowerPC: Align power7 memcpy using VSX to quadword
Authentication-results: sourceware.org; auth=none

This patch changes power7 memcpy to use VSX instructions only when
memory is aligned to quardword (16b).  It is to avoid unaligned kernel
traps on non-cacheable memory (for instance, memory-mapped I/O).

Checked on ppc64be and ppc32be.

--

2014-06-13  Adhemerval Zanella  <azanella@linux.vnet.ibm.com>

	* sysdeps/powerpc/powerpc64/power7/memcpy.S: Align VSX copies to 16B
	to avoid alignment traps in non-cacheable memory.
	* sysdeps/powerpc/powerpc32/power7/memcpy.S: Likewise.

---

diff --git a/sysdeps/powerpc/powerpc32/power7/memcpy.S b/sysdeps/powerpc/powerpc32/power7/memcpy.S
index 52c2a6b..e540fea 100644
--- a/sysdeps/powerpc/powerpc32/power7/memcpy.S
+++ b/sysdeps/powerpc/powerpc32/power7/memcpy.S
@@ -38,8 +38,8 @@ EALIGN (memcpy, 5, 0)
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
 				    code.  */
 
-	andi.   11,3,7	      /* Check alignment of DST.  */
-	clrlwi  10,4,29	      /* Check alignment of SRC.  */
+	andi.   11,3,15	      /* Check alignment of DST.  */
+	clrlwi  10,4,28	      /* Check alignment of SRC.  */
 	cmplw   cr6,10,11     /* SRC and DST alignments match?  */
 	mr	12,4
 	mr	31,5
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
index bbfd381..18467f6 100644
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
@@ -36,16 +36,11 @@ EALIGN (memcpy, 5, 0)
 	ble	cr1, L(copy_LT_32)  /* If move < 32 bytes use short move
 				    code.  */
 
-#ifdef __LITTLE_ENDIAN__
-/* In little-endian mode, power7 takes an alignment trap on any lxvd2x
-   or stxvd2x crossing a 32-byte boundary, so ensure the aligned_copy
-   loop is only used for quadword aligned copies.  */
+/* Align copies using VSX instructions to quadword. It is to avoid alignment
+   traps when memcpy is used on non-cacheable memory (for instance, memory
+   mapped I/O).  */
 	andi.	10,3,15
 	clrldi	11,4,60
-#else
-	andi.	10,3,7		/* Check alignment of DST.  */
-	clrldi	11,4,61		/* Check alignment of SRC.  */
-#endif
 	cmpld	cr6,10,11	/* SRC and DST alignments match?  */
 
 	mr	dst,3
@@ -53,11 +48,7 @@ EALIGN (memcpy, 5, 0)
 	beq	L(aligned_copy)
 
 	mtocrf	0x01,0
-#ifdef __LITTLE_ENDIAN__
 	clrldi	0,0,60
-#else
-	clrldi	0,0,61
-#endif
 
 /* Get the DST and SRC aligned to 8 bytes (16 for little-endian).  */
 1:
@@ -79,14 +70,12 @@ EALIGN (memcpy, 5, 0)
 	stw	6,0(dst)
 	addi	dst,dst,4
 8:
-#ifdef __LITTLE_ENDIAN__
 	bf	28,16f
 	ld	6,0(src)
 	addi	src,src,8
 	std	6,0(dst)
 	addi	dst,dst,8
 16:
-#endif
 	subf	cnt,0,cnt
 
 /* Main aligned copy loop. Copies 128 bytes at a time. */
@@ -298,9 +287,7 @@ L(copy_LE_8):
 	.align	4
 L(copy_GE_32_unaligned):
 	clrldi	0,0,60	      /* Number of bytes until the 1st dst quadword.  */
-#ifndef __LITTLE_ENDIAN__
 	andi.	10,3,15	      /* Check alignment of DST (against quadwords).  */
-#endif
 	srdi	9,cnt,4	      /* Number of full quadwords remaining.  */
 
 	beq	L(copy_GE_32_unaligned_cont)

Follow-Ups:
- Re: PowerPC: Align power7 memcpy using VSX to quadword
  - From: Adhemerval Zanella

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]