This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: 64-bit mips versions of memcpy and memset


On Mar 21, 2003, Andreas Jaeger <aj at suse dot de> wrote:

> Ok,

It turned out there was a bug in memcpy (and probably memset as well),
fixed with the patch below, that I'm going ahead and putting in just
to avoid having things broken for too long.  It also applies to the
mips32 versions the same formatting fixes (mostly line breaks) that I
applied to the mips64 version while porting them.

Index: ChangeLog
from  Alexandre Oliva  <aoliva at redhat dot com>
	* sysdeps/mips/mips64/memcpy.S: Fix porting bug that broke
	unaligned copying of 8-15 bytes.  From Chris Demetriou
	<cgd at broadcom dot com>.  Fix label names.
	* sysdeps/mips/mips64/memset.S: Fix label names.  Make similar
	change as to memcpy.S.
	* sysdeps/mips/memcpy.S: Formatting changes.
	* sysdeps/mips/memset.S: Likewise.

Index: sysdeps/mips/memcpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/memcpy.S,v
retrieving revision 1.4
diff -u -p -r1.4 memcpy.S
--- sysdeps/mips/memcpy.S 21 Mar 2003 19:44:24 -0000 1.4
+++ sysdeps/mips/memcpy.S 21 Mar 2003 21:31:57 -0000
@@ -54,12 +54,14 @@ ENTRY (memcpy)
 	SWHI	t0, 0(a0)
 	addu	a0, t1
 
-L(chk8w):	andi	t0, a2, 0x1f		# 32 or more bytes left?
+L(chk8w):	
+	andi	t0, a2, 0x1f		# 32 or more bytes left?
 	beq	t0, a2, L(chk1w)
 	subu	a3, a2, t0		# Yes
 	addu	a3, a1			# a3 = end address of loop
 	move	a2, t0			# a2 = what will be left after loop
-L(lop8w):	lw	t0,  0(a1)		# Loop taking 8 words at a time
+L(lop8w):	
+	lw	t0,  0(a1)		# Loop taking 8 words at a time
 	lw	t1,  4(a1)
 	lw	t2,  8(a1)
 	lw	t3, 12(a1)
@@ -79,28 +81,34 @@ L(lop8w):	lw	t0,  0(a1)		# Loop taking 8
 	bne	a1, a3, L(lop8w)
 	sw	t7,  -4(a0)
 
-L(chk1w):	andi	t0, a2, 0x3		# 4 or more bytes left?
+L(chk1w):	
+	andi	t0, a2, 0x3		# 4 or more bytes left?
 	beq	t0, a2, L(last8)
 	subu	a3, a2, t0		# Yes, handle them one word at a time
 	addu	a3, a1			# a3 again end address
 	move	a2, t0
-L(lop1w):	lw	t0, 0(a1)
+L(lop1w):	
+	lw	t0, 0(a1)
 	addiu	a0, 4
 	addiu	a1, 4
 	bne	a1, a3, L(lop1w)
 	sw	t0, -4(a0)
 
-L(last8):	blez	a2, L(lst8e)		# Handle last 8 bytes, one at a time
+L(last8):	
+	blez	a2, L(lst8e)		# Handle last 8 bytes, one at a time
 	addu	a3, a2, a1
-L(lst8l):	lb	t0, 0(a1)
+L(lst8l):	
+	lb	t0, 0(a1)
 	addiu	a0, 1
 	addiu	a1, 1
 	bne	a1, a3, L(lst8l)
 	sb	t0, -1(a0)
-L(lst8e):	jr	ra			# Bye, bye
+L(lst8e):	
+	jr	ra			# Bye, bye
 	nop
 
-L(shift):	subu	a3, zero, a0		# Src and Dest unaligned 
+L(shift):	
+	subu	a3, zero, a0		# Src and Dest unaligned 
 	andi	a3, 0x3			#  (unoptimized case...)
 	beq	a3, zero, L(shft1)
 	subu	a2, a3			# a2 = bytes left
@@ -109,16 +117,18 @@ L(shift):	subu	a3, zero, a0		# Src and D
 	addu	a1, a3
 	SWHI	t0, 0(a0)
 	addu	a0, a3
-L(shft1):	andi	t0, a2, 0x3
+L(shft1):	
+	andi	t0, a2, 0x3
 	subu	a3, a2, t0
 	addu	a3, a1
-L(shfth):	LWHI	t1, 0(a1)		# Limp through, word by word
+L(shfth):	
+	LWHI	t1, 0(a1)		# Limp through, word by word
 	LWLO	t1, 3(a1)
 	addiu	a0, 4
 	addiu	a1, 4
 	bne	a1, a3, L(shfth)
 	sw	t1, -4(a0)
-	b	L(last8)			# Handle anything which may be left
+	b	L(last8)		# Handle anything which may be left
 	move	a2, t0
 
 	.set	reorder
Index: sysdeps/mips/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/memset.S,v
retrieving revision 1.4
diff -u -p -r1.4 memset.S
--- sysdeps/mips/memset.S 21 Mar 2003 19:44:24 -0000 1.4
+++ sysdeps/mips/memset.S 21 Mar 2003 21:31:57 -0000
@@ -43,35 +43,42 @@ ENTRY (memset)
 	sll	t0, a1, 16
 	or	a1, t0			# a1 is now pattern in full word
 
-L(ueven):	subu	t0, zero, a0		# Unaligned address?
+L(ueven):	
+	subu	t0, zero, a0		# Unaligned address?
 	andi	t0, 0x3
 	beq	t0, zero, L(chkw)
 	subu	a2, t0
 	SWHI	a1, 0(a0)		# Yes, handle first unaligned part
 	addu	a0, t0			# Now both a0 and a2 are updated
 
-L(chkw):	andi	t0, a2, 0x7		# Enough left for one loop iteration?
+L(chkw):	
+	andi	t0, a2, 0x7		# Enough left for one loop iteration?
 	beq	t0, a2, L(chkl)
 	subu	a3, a2, t0
 	addu	a3, a0			# a3 is last loop address +1
 	move	a2, t0			# a2 is now # of bytes left after loop
-L(loopw):	addiu	a0, 8			# Handle 2 words pr. iteration
+L(loopw):	
+	addiu	a0, 8			# Handle 2 words pr. iteration
 	sw	a1, -8(a0)
 	bne	a0, a3, L(loopw)
 	sw	a1, -4(a0)
 
-L(chkl):	andi	t0, a2, 0x4		# Check if there is at least a full
+L(chkl):	
+	andi	t0, a2, 0x4		# Check if there is at least a full
 	beq	t0, zero, L(last8)	#  word remaining after the loop
 	subu	a2, t0
 	sw	a1, 0(a0)		# Yes...
 	addiu	a0, 4
 
-L(last8):	blez	a2, L(exit)		# Handle last 8 bytes (if cnt>0)
+L(last8):	
+	blez	a2, L(exit)		# Handle last 8 bytes (if cnt>0)
 	addu	a3, a2, a0		# a3 is last address +1
-L(lst8l):	addiu	a0, 1
+L(lst8l):	
+	addiu	a0, 1
 	bne	a0, a3, L(lst8l)
 	sb	a1, -1(a0)
-L(exit):	j	ra			# Bye, bye
+L(exit):	
+	j	ra			# Bye, bye
 	nop
 
 	.set	reorder
Index: sysdeps/mips/mips64/memcpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/mips64/memcpy.S,v
retrieving revision 1.1
diff -u -p -r1.1 memcpy.S
--- sysdeps/mips/mips64/memcpy.S 21 Mar 2003 19:44:24 -0000 1.1
+++ sysdeps/mips/mips64/memcpy.S 21 Mar 2003 21:31:57 -0000
@@ -42,8 +42,8 @@
 ENTRY (memcpy)
 	.set	noreorder
 
-	slti	a4, a2, 8		# Less than 8?
-	bne	a4, zero, L(last8)
+	slti	a4, a2, 16		# Less than 16?
+	bne	a4, zero, L(last16)
 	move	v0, a0			# Setup exit value before too late
 
 	xor	a4, a1, a0		# Find a0/a1 displacement
@@ -86,53 +86,53 @@ L(lop8w):	
 	sd	t7,  -8(a0)
 
 L(chk1w):
-	andi	a4, a2, 0x7		# 4 or more bytes left?
-	beq	a4, a2, L(last8)
-	PTR_SUBU a3, a2, a4		# Yes, handle them one word at a time
+	andi	a4, a2, 0x7		# 8 or more bytes left?
+	beq	a4, a2, L(last16)
+	PTR_SUBU a3, a2, a4		# Yes, handle them one dword at a time
 	PTR_ADDU a3, a1			# a3 again end address
 	move	a2, a4
-L(lop1w):	
+L(lop1w):
 	ld	a4, 0(a1)
 	PTR_ADDIU a0, 8
 	PTR_ADDIU a1, 8
 	bne	a1, a3, L(lop1w)
 	sd	a4, -8(a0)
 
-L(last8):
-	blez	a2, L(lst8e)		# Handle last 8 bytes, one at a time
+L(last16):
+	blez	a2, L(lst16e)		# Handle last 16 bytes, one at a time
 	PTR_ADDU a3, a2, a1
-L(lst8l):
+L(lst16l):
 	lb	a4, 0(a1)
 	PTR_ADDIU a0, 1
 	PTR_ADDIU a1, 1
-	bne	a1, a3, L(lst8l)
+	bne	a1, a3, L(lst16l)
 	sb	a4, -1(a0)
-L(lst8e):
+L(lst16e):
 	jr	ra			# Bye, bye
 	nop
 
 L(shift):
 	PTR_SUBU a3, zero, a0		# Src and Dest unaligned 
 	andi	a3, 0x7			#  (unoptimized case...)
-	beq	a3, zero, L(shfa5)
+	beq	a3, zero, L(shft1)
 	PTR_SUBU a2, a3			# a2 = bytes left
 	LDHI	a4, 0(a1)		# Take care of first odd part
 	LDLO	a4, 7(a1)
 	PTR_ADDU a1, a3
 	SDHI	a4, 0(a0)
 	PTR_ADDU a0, a3
-L(shfa5):
+L(shft1):
 	andi	a4, a2, 0x7
 	PTR_SUBU a3, a2, a4
 	PTR_ADDU a3, a1
 L(shfth):
-	LDHI	a5, 0(a1)		# Limp through, word by word
+	LDHI	a5, 0(a1)		# Limp through, dword by dword
 	LDLO	a5, 7(a1)
 	PTR_ADDIU a0, 8
 	PTR_ADDIU a1, 8
 	bne	a1, a3, L(shfth)
 	sd	a5, -8(a0)
-	b	L(last8)		# Handle anything which may be left
+	b	L(last16)		# Handle anything which may be left
 	move	a2, a4
 
 	.set	reorder
Index: sysdeps/mips/mips64/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/mips64/memset.S,v
retrieving revision 1.1
diff -u -p -r1.1 memset.S
--- sysdeps/mips/mips64/memset.S 21 Mar 2003 19:44:24 -0000 1.1
+++ sysdeps/mips/mips64/memset.S 21 Mar 2003 21:31:57 -0000
@@ -36,8 +36,8 @@
 ENTRY (memset)
 	.set	noreorder
 
-	slti	t5, a2, 8		# Less than 8?
-	bne	t5, zero, L(last8)
+	slti	t5, a2, 16		# Less than 16?
+	bne	t5, zero, L(last16)
 	move	v0, a0			# Setup exit value before too late
 
 	beq	a1, zero, L(ueven)	# If zero pattern, no need to extend
@@ -64,24 +64,24 @@ L(chkw):
 	PTR_ADDU a3, a0			# a3 is last loop address +1
 	move	a2, t4			# a2 is now # of bytes left after loop
 L(loopw):
-	PTR_ADDIU a0, 16		# Handle 2 words pr. iteration
+	PTR_ADDIU a0, 16		# Handle 2 dwords pr. iteration
 	sd	a1, -16(a0)
 	bne	a0, a3, L(loopw)
 	sd	a1,  -8(a0)
 
 L(chkl):
 	andi	t4, a2, 0x8		# Check if there is at least a double
-	beq	t4, zero, L(last8)	#  word remaining after the loop
+	beq	t4, zero, L(last16)	#  word remaining after the loop
 	PTR_SUBU a2, t4
 	sd	a1, 0(a0)		# Yes...
 	PTR_ADDIU a0, 8
 
-L(last8):
-	blez	a2, L(exit)		# Handle last 8 bytes (if cnt>0)
+L(last16):
+	blez	a2, L(exit)		# Handle last 16 bytes (if cnt>0)
 	PTR_ADDU a3, a2, a0		# a3 is last address +1
-L(lst8l):
+L(lst16l):
 	PTR_ADDIU a0, 1
-	bne	a0, a3, L(lst8l)
+	bne	a0, a3, L(lst16l)
 	sb	a1, -1(a0)
 L(exit):
 	j	ra			# Bye, bye
-- 
Alexandre Oliva   Enjoy Guarana', see http://www.ic.unicamp.br/~oliva/
Red Hat GCC Developer                 aoliva at {redhat dot com, gcc.gnu.org}
CS PhD student at IC-Unicamp        oliva at {lsd dot ic dot unicamp dot br, gnu.org}
Free Software Evangelist                Professional serial bug killer

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]