This is the mail archive of the
libc-alpha@sources.redhat.com
mailing list for the glibc project.
Re: 64-bit mips versions of memcpy and memset
On Mar 21, 2003, Andreas Jaeger <aj at suse dot de> wrote:
> Ok,
It turned out there was a bug in memcpy (and probably memset as well),
fixed with the patch below, that I'm going ahead and putting in just
to avoid having things broken for too long. It also applies to the
mips32 versions the same formatting fixes (mostly line breaks) that I
applied to the mips64 version while porting them.
Index: ChangeLog
from Alexandre Oliva <aoliva at redhat dot com>
* sysdeps/mips/mips64/memcpy.S: Fix porting bug that broke
unaligned copying of 8-15 bytes. From Chris Demetriou
<cgd at broadcom dot com>. Fix label names.
* sysdeps/mips/mips64/memset.S: Fix label names. Make similar
change as to memcpy.S.
* sysdeps/mips/memcpy.S: Formatting changes.
* sysdeps/mips/memset.S: Likewise.
Index: sysdeps/mips/memcpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/memcpy.S,v
retrieving revision 1.4
diff -u -p -r1.4 memcpy.S
--- sysdeps/mips/memcpy.S 21 Mar 2003 19:44:24 -0000 1.4
+++ sysdeps/mips/memcpy.S 21 Mar 2003 21:31:57 -0000
@@ -54,12 +54,14 @@ ENTRY (memcpy)
SWHI t0, 0(a0)
addu a0, t1
-L(chk8w): andi t0, a2, 0x1f # 32 or more bytes left?
+L(chk8w):
+ andi t0, a2, 0x1f # 32 or more bytes left?
beq t0, a2, L(chk1w)
subu a3, a2, t0 # Yes
addu a3, a1 # a3 = end address of loop
move a2, t0 # a2 = what will be left after loop
-L(lop8w): lw t0, 0(a1) # Loop taking 8 words at a time
+L(lop8w):
+ lw t0, 0(a1) # Loop taking 8 words at a time
lw t1, 4(a1)
lw t2, 8(a1)
lw t3, 12(a1)
@@ -79,28 +81,34 @@ L(lop8w): lw t0, 0(a1) # Loop taking 8
bne a1, a3, L(lop8w)
sw t7, -4(a0)
-L(chk1w): andi t0, a2, 0x3 # 4 or more bytes left?
+L(chk1w):
+ andi t0, a2, 0x3 # 4 or more bytes left?
beq t0, a2, L(last8)
subu a3, a2, t0 # Yes, handle them one word at a time
addu a3, a1 # a3 again end address
move a2, t0
-L(lop1w): lw t0, 0(a1)
+L(lop1w):
+ lw t0, 0(a1)
addiu a0, 4
addiu a1, 4
bne a1, a3, L(lop1w)
sw t0, -4(a0)
-L(last8): blez a2, L(lst8e) # Handle last 8 bytes, one at a time
+L(last8):
+ blez a2, L(lst8e) # Handle last 8 bytes, one at a time
addu a3, a2, a1
-L(lst8l): lb t0, 0(a1)
+L(lst8l):
+ lb t0, 0(a1)
addiu a0, 1
addiu a1, 1
bne a1, a3, L(lst8l)
sb t0, -1(a0)
-L(lst8e): jr ra # Bye, bye
+L(lst8e):
+ jr ra # Bye, bye
nop
-L(shift): subu a3, zero, a0 # Src and Dest unaligned
+L(shift):
+ subu a3, zero, a0 # Src and Dest unaligned
andi a3, 0x3 # (unoptimized case...)
beq a3, zero, L(shft1)
subu a2, a3 # a2 = bytes left
@@ -109,16 +117,18 @@ L(shift): subu a3, zero, a0 # Src and D
addu a1, a3
SWHI t0, 0(a0)
addu a0, a3
-L(shft1): andi t0, a2, 0x3
+L(shft1):
+ andi t0, a2, 0x3
subu a3, a2, t0
addu a3, a1
-L(shfth): LWHI t1, 0(a1) # Limp through, word by word
+L(shfth):
+ LWHI t1, 0(a1) # Limp through, word by word
LWLO t1, 3(a1)
addiu a0, 4
addiu a1, 4
bne a1, a3, L(shfth)
sw t1, -4(a0)
- b L(last8) # Handle anything which may be left
+ b L(last8) # Handle anything which may be left
move a2, t0
.set reorder
Index: sysdeps/mips/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/memset.S,v
retrieving revision 1.4
diff -u -p -r1.4 memset.S
--- sysdeps/mips/memset.S 21 Mar 2003 19:44:24 -0000 1.4
+++ sysdeps/mips/memset.S 21 Mar 2003 21:31:57 -0000
@@ -43,35 +43,42 @@ ENTRY (memset)
sll t0, a1, 16
or a1, t0 # a1 is now pattern in full word
-L(ueven): subu t0, zero, a0 # Unaligned address?
+L(ueven):
+ subu t0, zero, a0 # Unaligned address?
andi t0, 0x3
beq t0, zero, L(chkw)
subu a2, t0
SWHI a1, 0(a0) # Yes, handle first unaligned part
addu a0, t0 # Now both a0 and a2 are updated
-L(chkw): andi t0, a2, 0x7 # Enough left for one loop iteration?
+L(chkw):
+ andi t0, a2, 0x7 # Enough left for one loop iteration?
beq t0, a2, L(chkl)
subu a3, a2, t0
addu a3, a0 # a3 is last loop address +1
move a2, t0 # a2 is now # of bytes left after loop
-L(loopw): addiu a0, 8 # Handle 2 words pr. iteration
+L(loopw):
+ addiu a0, 8 # Handle 2 words pr. iteration
sw a1, -8(a0)
bne a0, a3, L(loopw)
sw a1, -4(a0)
-L(chkl): andi t0, a2, 0x4 # Check if there is at least a full
+L(chkl):
+ andi t0, a2, 0x4 # Check if there is at least a full
beq t0, zero, L(last8) # word remaining after the loop
subu a2, t0
sw a1, 0(a0) # Yes...
addiu a0, 4
-L(last8): blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
+L(last8):
+ blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
addu a3, a2, a0 # a3 is last address +1
-L(lst8l): addiu a0, 1
+L(lst8l):
+ addiu a0, 1
bne a0, a3, L(lst8l)
sb a1, -1(a0)
-L(exit): j ra # Bye, bye
+L(exit):
+ j ra # Bye, bye
nop
.set reorder
Index: sysdeps/mips/mips64/memcpy.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/mips64/memcpy.S,v
retrieving revision 1.1
diff -u -p -r1.1 memcpy.S
--- sysdeps/mips/mips64/memcpy.S 21 Mar 2003 19:44:24 -0000 1.1
+++ sysdeps/mips/mips64/memcpy.S 21 Mar 2003 21:31:57 -0000
@@ -42,8 +42,8 @@
ENTRY (memcpy)
.set noreorder
- slti a4, a2, 8 # Less than 8?
- bne a4, zero, L(last8)
+ slti a4, a2, 16 # Less than 16?
+ bne a4, zero, L(last16)
move v0, a0 # Setup exit value before too late
xor a4, a1, a0 # Find a0/a1 displacement
@@ -86,53 +86,53 @@ L(lop8w):
sd t7, -8(a0)
L(chk1w):
- andi a4, a2, 0x7 # 4 or more bytes left?
- beq a4, a2, L(last8)
- PTR_SUBU a3, a2, a4 # Yes, handle them one word at a time
+ andi a4, a2, 0x7 # 8 or more bytes left?
+ beq a4, a2, L(last16)
+ PTR_SUBU a3, a2, a4 # Yes, handle them one dword at a time
PTR_ADDU a3, a1 # a3 again end address
move a2, a4
-L(lop1w):
+L(lop1w):
ld a4, 0(a1)
PTR_ADDIU a0, 8
PTR_ADDIU a1, 8
bne a1, a3, L(lop1w)
sd a4, -8(a0)
-L(last8):
- blez a2, L(lst8e) # Handle last 8 bytes, one at a time
+L(last16):
+ blez a2, L(lst16e) # Handle last 16 bytes, one at a time
PTR_ADDU a3, a2, a1
-L(lst8l):
+L(lst16l):
lb a4, 0(a1)
PTR_ADDIU a0, 1
PTR_ADDIU a1, 1
- bne a1, a3, L(lst8l)
+ bne a1, a3, L(lst16l)
sb a4, -1(a0)
-L(lst8e):
+L(lst16e):
jr ra # Bye, bye
nop
L(shift):
PTR_SUBU a3, zero, a0 # Src and Dest unaligned
andi a3, 0x7 # (unoptimized case...)
- beq a3, zero, L(shfa5)
+ beq a3, zero, L(shft1)
PTR_SUBU a2, a3 # a2 = bytes left
LDHI a4, 0(a1) # Take care of first odd part
LDLO a4, 7(a1)
PTR_ADDU a1, a3
SDHI a4, 0(a0)
PTR_ADDU a0, a3
-L(shfa5):
+L(shft1):
andi a4, a2, 0x7
PTR_SUBU a3, a2, a4
PTR_ADDU a3, a1
L(shfth):
- LDHI a5, 0(a1) # Limp through, word by word
+ LDHI a5, 0(a1) # Limp through, dword by dword
LDLO a5, 7(a1)
PTR_ADDIU a0, 8
PTR_ADDIU a1, 8
bne a1, a3, L(shfth)
sd a5, -8(a0)
- b L(last8) # Handle anything which may be left
+ b L(last16) # Handle anything which may be left
move a2, a4
.set reorder
Index: sysdeps/mips/mips64/memset.S
===================================================================
RCS file: /cvs/glibc/libc/sysdeps/mips/mips64/memset.S,v
retrieving revision 1.1
diff -u -p -r1.1 memset.S
--- sysdeps/mips/mips64/memset.S 21 Mar 2003 19:44:24 -0000 1.1
+++ sysdeps/mips/mips64/memset.S 21 Mar 2003 21:31:57 -0000
@@ -36,8 +36,8 @@
ENTRY (memset)
.set noreorder
- slti t5, a2, 8 # Less than 8?
- bne t5, zero, L(last8)
+ slti t5, a2, 16 # Less than 16?
+ bne t5, zero, L(last16)
move v0, a0 # Setup exit value before too late
beq a1, zero, L(ueven) # If zero pattern, no need to extend
@@ -64,24 +64,24 @@ L(chkw):
PTR_ADDU a3, a0 # a3 is last loop address +1
move a2, t4 # a2 is now # of bytes left after loop
L(loopw):
- PTR_ADDIU a0, 16 # Handle 2 words pr. iteration
+ PTR_ADDIU a0, 16 # Handle 2 dwords pr. iteration
sd a1, -16(a0)
bne a0, a3, L(loopw)
sd a1, -8(a0)
L(chkl):
andi t4, a2, 0x8 # Check if there is at least a double
- beq t4, zero, L(last8) # word remaining after the loop
+ beq t4, zero, L(last16) # word remaining after the loop
PTR_SUBU a2, t4
sd a1, 0(a0) # Yes...
PTR_ADDIU a0, 8
-L(last8):
- blez a2, L(exit) # Handle last 8 bytes (if cnt>0)
+L(last16):
+ blez a2, L(exit) # Handle last 16 bytes (if cnt>0)
PTR_ADDU a3, a2, a0 # a3 is last address +1
-L(lst8l):
+L(lst16l):
PTR_ADDIU a0, 1
- bne a0, a3, L(lst8l)
+ bne a0, a3, L(lst16l)
sb a1, -1(a0)
L(exit):
j ra # Bye, bye
--
Alexandre Oliva Enjoy Guarana', see http://www.ic.unicamp.br/~oliva/
Red Hat GCC Developer aoliva at {redhat dot com, gcc.gnu.org}
CS PhD student at IC-Unicamp oliva at {lsd dot ic dot unicamp dot br, gnu.org}
Free Software Evangelist Professional serial bug killer