This is the mail archive of the
glibc-cvs@sourceware.org
mailing list for the glibc project.
GNU C Library master sources branch, master, updated. glibc-2.14-587-g2797bea
- From: drepper at sourceware dot org
- To: glibc-cvs at sourceware dot org
- Date: 22 Dec 2011 19:23:34 -0000
- Subject: GNU C Library master sources branch, master, updated. glibc-2.14-587-g2797bea
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".
The branch, master has been updated
via 2797beae36e8869459b80c51f216cb4558675dad (commit)
via 2bd779ae3f3a86bce22fcb7665d740b14ac677ca (commit)
from 154bfc16225aaa3d3104e758eed2a17297131599 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2797beae36e8869459b80c51f216cb4558675dad
commit 2797beae36e8869459b80c51f216cb4558675dad
Merge: 2bd779a 154bfc1
Author: Ulrich Drepper <drepper@gmail.com>
Date: Thu Dec 22 14:23:28 2011 -0500
Merge branch 'master' of ssh://sourceware.org/git/glibc
Conflicts:
ChangeLog
diff --cc ChangeLog
index 8595c03,f74e0a5..205020d
--- a/ChangeLog
+++ b/ChangeLog
@@@ -1,8 -1,31 +1,36 @@@
+2011-12-22 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Fix wrong copying
+ processing for last bytes.
+
+ 2011-12-22 Joseph Myers <joseph@codesourcery.com>
+
+ * sysdeps/unix/sysv/linux/Makefile (syscall-list-variants)
+ (syscall-list-default-options, syscall-list-default-condition)
+ (syscall-list-includes): Define.
+ ($(objpfx)syscall-%.h $(objpfx)syscall-%.d): Support arbitrary
+ list of ABIs and options and #if conditions for each ABI. Do not
+ handle common syscalls between ABIs specially.
+ * sysdeps/unix/sysv/linux/powerpc/Makefile (64bit-predefine):
+ Remove.
+ (syscall-list-variants, syscall-list-32bit-options)
+ (syscall-list-32bit-condition, syscall-list-64bit-options)
+ (syscall-list-64bit-condition): Define.
+ * sysdeps/unix/sysv/linux/s390/Makefile (64bit-predefine): Remove.
+ (syscall-list-variants, syscall-list-32bit-options)
+ (syscall-list-32bit-condition, syscall-list-64bit-options)
+ (syscall-list-64bit-condition): Define.
+ * sysdeps/unix/sysv/linux/sparc/Makefile (64bit-predefine):
+ Remove.
+ (syscall-list-variants, syscall-list-32bit-options)
+ (syscall-list-32bit-condition, syscall-list-64bit-options)
+ (syscall-list-64bit-condition): Define.
+ * sysdeps/unix/sysv/linux/x86_64/Makefile (64bit-predefine):
+ Remove.
+ (syscall-list-variants, syscall-list-32bit-options)
+ (syscall-list-32bit-condition, syscall-list-64bit-options)
+ (syscall-list-64bit-condition): Define.
+
2011-12-22 Ulrich Drepper <drepper@gmail.com>
* locale/iso-639.def: Add brx entry.
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=2bd779ae3f3a86bce22fcb7665d740b14ac677ca
commit 2bd779ae3f3a86bce22fcb7665d740b14ac677ca
Author: Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
Date: Thu Dec 22 14:22:00 2011 -0500
Fix overrun in strcpy destination buffer in x86-32/SSSE3 version
diff --git a/ChangeLog b/ChangeLog
index a9cdf76..8595c03 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-12-22 Liubov Dmitrieva <liubov.dmitrieva@gmail.com>
+
+ * sysdeps/i386/i686/multiarch/strcpy-ssse3.S: Fix wrong copying
+ processing for last bytes.
+
2011-12-22 Ulrich Drepper <drepper@gmail.com>
* locale/iso-639.def: Add brx entry.
diff --git a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
index 073856f..470ddbe 100644
--- a/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
+++ b/sysdeps/i386/i686/multiarch/strcpy-ssse3.S
@@ -20,6 +20,7 @@
#ifndef NOT_IN_libc
+
# ifndef USE_AS_STRCAT
# include <sysdep.h>
@@ -31,8 +32,8 @@
cfi_adjust_cfa_offset (-4); \
cfi_restore (REG)
-# define PUSH(REG) pushl REG; CFI_PUSH (REG)
-# define POP(REG) popl REG; CFI_POP (REG)
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
# ifndef STRCPY
# define STRCPY __strcpy_ssse3
@@ -40,14 +41,22 @@
# ifdef USE_AS_STRNCPY
# define PARMS 8
-# define ENTRANCE PUSH(%ebx)
-# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx);
-# define RETURN1 POP(%edi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi)
+# define ENTRANCE PUSH (%ebx)
+# define RETURN POP (%ebx); ret; CFI_PUSH (%ebx);
+# define RETURN1 POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
# else
# define PARMS 4
# define ENTRANCE
# define RETURN ret
-# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi)
+# define RETURN1 POP (%edi); ret; CFI_PUSH (%edi)
+# endif
+
+# ifdef USE_AS_STPCPY
+# define SAVE_RESULT(n) lea n(%edx), %eax
+# define SAVE_RESULT_TAIL(n) lea n(%edx), %eax
+# else
+# define SAVE_RESULT(n) movl %edi, %eax
+# define SAVE_RESULT_TAIL(n) movl %edx, %eax
# endif
# define STR1 PARMS
@@ -60,9 +69,7 @@
movl - 4 byte
movlpd - 8 byte
movaps - 16 byte - requires 16 byte alignment
- of sourse and destination adresses.
- 16 byte alignment: adress is 32bit value,
- right four bit of adress shall be 0.
+ of sourse and destination adresses.
*/
.text
@@ -72,8 +79,6 @@ ENTRY (STRCPY)
mov STR2(%esp), %ecx
# ifdef USE_AS_STRNCPY
movl LEN(%esp), %ebx
- test %ebx, %ebx
- jz L(ExitTail0)
cmp $8, %ebx
jbe L(StrncpyExit8Bytes)
# endif
@@ -127,39 +132,23 @@ ENTRY (STRCPY)
sub $16, %ebx
and $0xf, %esi
-/* add 16 bytes ecx_shift to ebx */
+/* add 16 bytes ecx_offset to ebx */
add %esi, %ebx
# endif
lea 16(%ecx), %esi
-/* Now:
- esi = alignment_16(ecx) + ecx_shift + 16;
- ecx_shift = ecx - alignment_16(ecx)
-*/
and $-16, %esi
-/* Now:
- esi = alignment_16(ecx) + 16
-*/
pxor %xmm0, %xmm0
movlpd (%ecx), %xmm1
movlpd %xmm1, (%edx)
-/*
- look if there is zero symbol in next 16 bytes of string
- from esi to esi + 15 and form mask in xmm0
-*/
+
pcmpeqb (%esi), %xmm0
movlpd 8(%ecx), %xmm1
movlpd %xmm1, 8(%edx)
-/* convert byte mask in xmm0 to bit mask */
-
pmovmskb %xmm0, %eax
sub %ecx, %esi
-/* esi = 16 - ecx_shift */
-
-/* eax = 0: there isn't end of string from position esi to esi+15 */
-
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(CopyFrom1To16BytesCase2OrCase3)
@@ -169,17 +158,9 @@ ENTRY (STRCPY)
mov %edx, %eax
lea 16(%edx), %edx
-/* Now:
- edx = edx + 16 = alignment_16(edx) + edx_shift + 16
-*/
and $-16, %edx
-
-/* Now: edx = alignment_16(edx) + 16 */
-
sub %edx, %eax
-/* Now: eax = edx_shift - 16 */
-
# ifdef USE_AS_STRNCPY
add %eax, %esi
lea -1(%esi), %esi
@@ -191,22 +172,11 @@ ENTRY (STRCPY)
L(ContinueCopy):
# endif
sub %eax, %ecx
-/* Now:
- case ecx_shift >= edx_shift:
- ecx = alignment_16(ecx) + (ecx_shift - edx_shift) + 16
- case ecx_shift < edx_shift:
- ecx = alignment_16(ecx) + (16 + ecx_shift - edx_shift)
-*/
mov %ecx, %eax
and $0xf, %eax
-/* Now:
- case ecx_shift >= edx_shift: eax = ecx_shift - edx_shift
- case ecx_shift < edx_shift: eax = (16 + ecx_shift - edx_shift)
- eax can be 0, 1, ..., 15
-*/
mov $0, %esi
-/* case: ecx_shift == edx_shift */
+/* case: ecx_offset == edx_offset */
jz L(Align16Both)
@@ -323,7 +293,7 @@ L(Align16Both):
sub %ecx, %eax
sub %eax, %edx
# ifdef USE_AS_STRNCPY
- lea 48+64(%ebx, %eax), %ebx
+ lea 112(%ebx, %eax), %ebx
# endif
mov $-0x40, %esi
@@ -441,7 +411,6 @@ L(Shl1Start):
jnz L(Shl1LoopExit)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
@@ -449,7 +418,6 @@ L(Shl1Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit1Case2OrCase3)
@@ -457,8 +425,7 @@ L(Shl1Start):
test %eax, %eax
jnz L(Shl1LoopExit)
- palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 31(%ecx), %ecx
lea 16(%edx), %edx
@@ -506,11 +473,11 @@ L(Shl1LoopStart):
jmp L(Shl1LoopStart)
L(Shl1LoopExit):
- movaps (%edx), %xmm6
- psrldq $15, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ movlpd 7(%ecx), %xmm0
+ movlpd %xmm0, 7(%edx)
mov $15, %esi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -563,7 +530,6 @@ L(Shl2Start):
jnz L(Shl2LoopExit)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
@@ -571,7 +537,6 @@ L(Shl2Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit2Case2OrCase3)
@@ -579,8 +544,7 @@ L(Shl2Start):
test %eax, %eax
jnz L(Shl2LoopExit)
- palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 30(%ecx), %ecx
lea 16(%edx), %edx
@@ -628,11 +592,11 @@ L(Shl2LoopStart):
jmp L(Shl2LoopStart)
L(Shl2LoopExit):
- movaps (%edx), %xmm6
- psrldq $14, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
mov $14, %esi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -685,7 +649,6 @@ L(Shl3Start):
jnz L(Shl3LoopExit)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
@@ -693,7 +656,6 @@ L(Shl3Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit3Case2OrCase3)
@@ -701,8 +663,7 @@ L(Shl3Start):
test %eax, %eax
jnz L(Shl3LoopExit)
- palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 29(%ecx), %ecx
lea 16(%edx), %edx
@@ -750,11 +711,11 @@ L(Shl3LoopStart):
jmp L(Shl3LoopStart)
L(Shl3LoopExit):
- movaps (%edx), %xmm6
- psrldq $13, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
mov $13, %esi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -807,7 +768,6 @@ L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
@@ -815,7 +775,6 @@ L(Shl4Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit4Case2OrCase3)
@@ -823,8 +782,7 @@ L(Shl4Start):
test %eax, %eax
jnz L(Shl4LoopExit)
- palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
@@ -872,11 +830,11 @@ L(Shl4LoopStart):
jmp L(Shl4LoopStart)
L(Shl4LoopExit):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
mov $12, %esi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -929,7 +887,6 @@ L(Shl5Start):
jnz L(Shl5LoopExit)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
@@ -937,7 +894,6 @@ L(Shl5Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit5Case2OrCase3)
@@ -945,8 +901,7 @@ L(Shl5Start):
test %eax, %eax
jnz L(Shl5LoopExit)
- palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 27(%ecx), %ecx
lea 16(%edx), %edx
@@ -994,11 +949,11 @@ L(Shl5LoopStart):
jmp L(Shl5LoopStart)
L(Shl5LoopExit):
- movaps (%edx), %xmm6
- psrldq $11, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
mov $11, %esi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1051,7 +1006,6 @@ L(Shl6Start):
jnz L(Shl6LoopExit)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
@@ -1059,7 +1013,6 @@ L(Shl6Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit6Case2OrCase3)
@@ -1067,8 +1020,7 @@ L(Shl6Start):
test %eax, %eax
jnz L(Shl6LoopExit)
- palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 26(%ecx), %ecx
lea 16(%edx), %edx
@@ -1116,11 +1068,11 @@ L(Shl6LoopStart):
jmp L(Shl6LoopStart)
L(Shl6LoopExit):
- movaps (%edx), %xmm6
- psrldq $10, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
mov $10, %esi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1173,7 +1125,6 @@ L(Shl7Start):
jnz L(Shl7LoopExit)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
@@ -1181,7 +1132,6 @@ L(Shl7Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit7Case2OrCase3)
@@ -1189,8 +1139,7 @@ L(Shl7Start):
test %eax, %eax
jnz L(Shl7LoopExit)
- palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 25(%ecx), %ecx
lea 16(%edx), %edx
@@ -1238,11 +1187,11 @@ L(Shl7LoopStart):
jmp L(Shl7LoopStart)
L(Shl7LoopExit):
- movaps (%edx), %xmm6
- psrldq $9, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
mov $9, %esi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1295,7 +1244,6 @@ L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
@@ -1303,7 +1251,6 @@ L(Shl8Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit8Case2OrCase3)
@@ -1311,8 +1258,7 @@ L(Shl8Start):
test %eax, %eax
jnz L(Shl8LoopExit)
- palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
@@ -1360,11 +1306,9 @@ L(Shl8LoopStart):
jmp L(Shl8LoopStart)
L(Shl8LoopExit):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $8, %esi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1417,7 +1361,6 @@ L(Shl9Start):
jnz L(Shl9LoopExit)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
@@ -1425,7 +1368,6 @@ L(Shl9Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit9Case2OrCase3)
@@ -1433,8 +1375,7 @@ L(Shl9Start):
test %eax, %eax
jnz L(Shl9LoopExit)
- palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 23(%ecx), %ecx
lea 16(%edx), %edx
@@ -1482,11 +1423,9 @@ L(Shl9LoopStart):
jmp L(Shl9LoopStart)
L(Shl9LoopExit):
- movaps (%edx), %xmm6
- psrldq $7, %xmm6
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
mov $7, %esi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1539,7 +1478,6 @@ L(Shl10Start):
jnz L(Shl10LoopExit)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
@@ -1547,7 +1485,6 @@ L(Shl10Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit10Case2OrCase3)
@@ -1555,8 +1492,7 @@ L(Shl10Start):
test %eax, %eax
jnz L(Shl10LoopExit)
- palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 22(%ecx), %ecx
lea 16(%edx), %edx
@@ -1604,11 +1540,9 @@ L(Shl10LoopStart):
jmp L(Shl10LoopStart)
L(Shl10LoopExit):
- movaps (%edx), %xmm6
- psrldq $6, %xmm6
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
mov $6, %esi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1661,7 +1595,6 @@ L(Shl11Start):
jnz L(Shl11LoopExit)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
@@ -1669,7 +1602,6 @@ L(Shl11Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit11Case2OrCase3)
@@ -1677,8 +1609,7 @@ L(Shl11Start):
test %eax, %eax
jnz L(Shl11LoopExit)
- palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 21(%ecx), %ecx
lea 16(%edx), %edx
@@ -1726,11 +1657,9 @@ L(Shl11LoopStart):
jmp L(Shl11LoopStart)
L(Shl11LoopExit):
- movaps (%edx), %xmm6
- psrldq $5, %xmm6
+ movlpd -3(%ecx), %xmm0
+ movlpd %xmm0, -3(%edx)
mov $5, %esi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1783,7 +1712,6 @@ L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
@@ -1791,7 +1719,6 @@ L(Shl12Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit12Case2OrCase3)
@@ -1799,8 +1726,7 @@ L(Shl12Start):
test %eax, %eax
jnz L(Shl12LoopExit)
- palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
@@ -1848,11 +1774,9 @@ L(Shl12LoopStart):
jmp L(Shl12LoopStart)
L(Shl12LoopExit):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -1905,7 +1829,6 @@ L(Shl13Start):
jnz L(Shl13LoopExit)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
@@ -1913,7 +1836,6 @@ L(Shl13Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit13Case2OrCase3)
@@ -1921,8 +1843,7 @@ L(Shl13Start):
test %eax, %eax
jnz L(Shl13LoopExit)
- palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 19(%ecx), %ecx
lea 16(%edx), %edx
@@ -1970,11 +1891,9 @@ L(Shl13LoopStart):
jmp L(Shl13LoopStart)
L(Shl13LoopExit):
- movaps (%edx), %xmm6
- psrldq $3, %xmm6
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
mov $3, %esi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -2027,7 +1946,6 @@ L(Shl14Start):
jnz L(Shl14LoopExit)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
@@ -2035,7 +1953,6 @@ L(Shl14Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit14Case2OrCase3)
@@ -2043,8 +1960,7 @@ L(Shl14Start):
test %eax, %eax
jnz L(Shl14LoopExit)
- palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 18(%ecx), %ecx
lea 16(%edx), %edx
@@ -2092,11 +2008,9 @@ L(Shl14LoopStart):
jmp L(Shl14LoopStart)
L(Shl14LoopExit):
- movaps (%edx), %xmm6
- psrldq $2, %xmm6
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
mov $2, %esi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx)
jmp L(CopyFrom1To16Bytes)
.p2align 4
@@ -2149,7 +2063,6 @@ L(Shl15Start):
jnz L(Shl15LoopExit)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
@@ -2157,7 +2070,6 @@ L(Shl15Start):
lea 16(%edx), %edx
pmovmskb %xmm0, %eax
lea 16(%ecx), %ecx
- movaps %xmm2, %xmm3
# ifdef USE_AS_STRNCPY
sub $16, %ebx
jbe L(StrncpyExit15Case2OrCase3)
@@ -2165,8 +2077,7 @@ L(Shl15Start):
test %eax, %eax
jnz L(Shl15LoopExit)
- palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, (%edx)
lea 17(%ecx), %ecx
lea 16(%edx), %edx
@@ -2214,15 +2125,14 @@ L(Shl15LoopStart):
jmp L(Shl15LoopStart)
L(Shl15LoopExit):
- movaps (%edx), %xmm6
- psrldq $1, %xmm6
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
mov $1, %esi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx)
# ifdef USE_AS_STRCAT
jmp L(CopyFrom1To16Bytes)
# endif
+
# ifndef USE_AS_STRCAT
.p2align 4
@@ -2235,15 +2145,38 @@ L(CopyFrom1To16Bytes):
POP (%esi)
test %al, %al
- jz L(ExitHigh)
+ jz L(ExitHigh8)
+
+L(CopyFrom1To16BytesLess8):
+ mov %al, %ah
+ and $15, %ah
+ jz L(ExitHigh4)
+
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
- test $0x08, %al
- jnz L(Exit4)
+
+ .p2align 4
+L(Exit4):
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (3)
+# ifdef USE_AS_STRNCPY
+ sub $4, %ebx
+ lea 4(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4):
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
@@ -2255,11 +2188,7 @@ L(CopyFrom1To16Bytes):
L(Exit8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
@@ -2272,15 +2201,38 @@ L(Exit8):
RETURN1
.p2align 4
-L(ExitHigh):
+L(ExitHigh8):
+ mov %ah, %al
+ and $15, %al
+ jz L(ExitHigh12)
+
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
- test $0x08, %ah
- jnz L(Exit12)
+
+ .p2align 4
+L(Exit12):
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (11)
+# ifdef USE_AS_STRNCPY
+ sub $12, %ebx
+ lea 12(%edx), %ecx
+ jnz L(StrncpyFillTailWithZero1)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+# endif
+ RETURN1
+
+ .p2align 4
+L(ExitHigh12):
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
@@ -2290,15 +2242,9 @@ L(ExitHigh):
.p2align 4
L(Exit16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
@@ -2310,7 +2256,7 @@ L(Exit16):
# endif
RETURN1
-# ifdef USE_AS_STRNCPY
+# ifdef USE_AS_STRNCPY
CFI_PUSH(%esi)
@@ -2318,79 +2264,84 @@ L(Exit16):
L(CopyFrom1To16BytesCase2):
add $16, %ebx
add %esi, %ecx
- lea (%esi, %edx), %esi
- lea -9(%ebx), %edx
- and $1<<7, %dh
- or %al, %dh
- test %dh, %dh
- lea (%esi), %edx
+ add %esi, %edx
+
POP (%esi)
+
+ test %al, %al
jz L(ExitHighCase2)
- cmp $1, %ebx
- je L(Exit1)
+ cmp $8, %ebx
+ ja L(CopyFrom1To16BytesLess8)
+
test $0x01, %al
jnz L(Exit1)
- cmp $2, %ebx
- je L(Exit2)
+ cmp $1, %ebx
+ je L(Exit1)
test $0x02, %al
jnz L(Exit2)
- cmp $3, %ebx
- je L(Exit3)
+ cmp $2, %ebx
+ je L(Exit2)
test $0x04, %al
jnz L(Exit3)
- cmp $4, %ebx
- je L(Exit4)
+ cmp $3, %ebx
+ je L(Exit3)
test $0x08, %al
jnz L(Exit4)
- cmp $5, %ebx
- je L(Exit5)
+ cmp $4, %ebx
+ je L(Exit4)
test $0x10, %al
jnz L(Exit5)
- cmp $6, %ebx
- je L(Exit6)
+ cmp $5, %ebx
+ je L(Exit5)
test $0x20, %al
jnz L(Exit6)
- cmp $7, %ebx
- je L(Exit7)
+ cmp $6, %ebx
+ je L(Exit6)
test $0x40, %al
jnz L(Exit7)
+ cmp $7, %ebx
+ je L(Exit7)
jmp L(Exit8)
.p2align 4
L(ExitHighCase2):
- cmp $9, %ebx
- je L(Exit9)
+ cmp $8, %ebx
+ jbe L(CopyFrom1To16BytesLess8Case3)
+
test $0x01, %ah
jnz L(Exit9)
- cmp $10, %ebx
- je L(Exit10)
+ cmp $9, %ebx
+ je L(Exit9)
test $0x02, %ah
jnz L(Exit10)
- cmp $11, %ebx
- je L(Exit11)
+ cmp $10, %ebx
+ je L(Exit10)
test $0x04, %ah
jnz L(Exit11)
- cmp $12, %ebx
- je L(Exit12)
+ cmp $11, %ebx
+ je L(Exit11)
test $0x8, %ah
jnz L(Exit12)
- cmp $13, %ebx
- je L(Exit13)
+ cmp $12, %ebx
+ je L(Exit12)
test $0x10, %ah
jnz L(Exit13)
- cmp $14, %ebx
- je L(Exit14)
+ cmp $13, %ebx
+ je L(Exit13)
test $0x20, %ah
jnz L(Exit14)
- cmp $15, %ebx
- je L(Exit15)
+ cmp $14, %ebx
+ je L(Exit14)
test $0x40, %ah
jnz L(Exit15)
+ cmp $15, %ebx
+ je L(Exit15)
jmp L(Exit16)
CFI_PUSH(%esi)
+ .p2align 4
L(CopyFrom1To16BytesCase2OrCase3):
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
@@ -2402,47 +2353,78 @@ L(CopyFrom1To16BytesCase3):
add %esi, %ecx
POP (%esi)
- cmp $16, %ebx
- je L(Exit16)
+
cmp $8, %ebx
- je L(Exit8)
- jg L(More8Case3)
+ ja L(ExitHigh8Case3)
+
+L(CopyFrom1To16BytesLess8Case3):
cmp $4, %ebx
- je L(Exit4)
- jg L(More4Case3)
+ ja L(ExitHigh4Case3)
+
+ cmp $1, %ebx
+ je L(Exit1)
cmp $2, %ebx
- jl L(Exit1)
je L(Exit2)
- jg L(Exit3)
-L(More8Case3): /* but less than 16 */
- cmp $12, %ebx
- je L(Exit12)
- jl L(Less12Case3)
- cmp $14, %ebx
- jl L(Exit13)
- je L(Exit14)
- jg L(Exit15)
-L(More4Case3): /* but less than 8 */
+ cmp $3, %ebx
+ je L(Exit3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT (4)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh4Case3):
+ cmp $5, %ebx
+ je L(Exit5)
cmp $6, %ebx
- jl L(Exit5)
je L(Exit6)
- jg L(Exit7)
-L(Less12Case3): /* but more than 8 */
+ cmp $7, %ebx
+ je L(Exit7)
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
+ SAVE_RESULT (8)
+ RETURN1
+
+ .p2align 4
+L(ExitHigh8Case3):
+ cmp $12, %ebx
+ ja L(ExitHigh12Case3)
+
+ cmp $9, %ebx
+ je L(Exit9)
cmp $10, %ebx
- jl L(Exit9)
je L(Exit10)
- jg L(Exit11)
-# endif
+ cmp $11, %ebx
+ je L(Exit11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT (12)
+ RETURN1
.p2align 4
-L(Exit1):
+L(ExitHigh12Case3):
+ cmp $13, %ebx
+ je L(Exit13)
+ cmp $14, %ebx
+ je L(Exit14)
+ cmp $15, %ebx
+ je L(Exit15)
+ movlpd (%ecx), %xmm0
+ movlpd 8(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 8(%edx)
+ SAVE_RESULT (16)
+ RETURN1
+
+# endif
+
+ .p2align 4
+L(Exit1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
@@ -2458,11 +2440,7 @@ L(Exit1):
L(Exit2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
@@ -2480,11 +2458,7 @@ L(Exit3):
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
@@ -2497,36 +2471,12 @@ L(Exit3):
RETURN1
.p2align 4
-L(Exit4):
- movl (%ecx), %eax
- movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $4, %ebx
- lea 4(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
L(Exit5):
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
@@ -2544,11 +2494,7 @@ L(Exit6):
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
@@ -2566,11 +2512,7 @@ L(Exit7):
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
@@ -2585,14 +2527,10 @@ L(Exit7):
.p2align 4
L(Exit9):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
@@ -2607,14 +2545,10 @@ L(Exit9):
.p2align 4
L(Exit10):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
@@ -2629,14 +2563,10 @@ L(Exit10):
.p2align 4
L(Exit11):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ SAVE_RESULT (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
@@ -2649,38 +2579,12 @@ L(Exit11):
RETURN1
.p2align 4
-L(Exit12):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movl 8(%ecx), %eax
- movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edi, %eax
-# endif
-# ifdef USE_AS_STRNCPY
- sub $12, %ebx
- lea 12(%edx), %ecx
- jnz L(StrncpyFillTailWithZero1)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
-# endif
- RETURN1
-
- .p2align 4
L(Exit13):
movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
@@ -2695,14 +2599,10 @@ L(Exit13):
.p2align 4
L(Exit14):
movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
@@ -2717,14 +2617,10 @@ L(Exit14):
.p2align 4
L(Exit15):
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edi, %eax
-# endif
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
@@ -2853,7 +2749,7 @@ L(FillFrom1To16Bytes):
jl L(Fill1)
je L(Fill2)
jg L(Fill3)
-L(FillMore8): /* but less than 16 */
+L(FillMore8): /* but less than 16 */
cmp $12, %ebx
je L(Fill12)
jl L(FillLess12)
@@ -2861,18 +2757,18 @@ L(FillMore8): /* but less than 16 */
jl L(Fill13)
je L(Fill14)
jg L(Fill15)
-L(FillMore4): /* but less than 8 */
+L(FillMore4): /* but less than 8 */
cmp $6, %ebx
jl L(Fill5)
je L(Fill6)
jg L(Fill7)
-L(FillLess12): /* but more than 8 */
+L(FillLess12): /* but more than 8 */
cmp $10, %ebx
jl L(Fill9)
je L(Fill10)
jmp L(Fill11)
- CFI_PUSH (%edi)
+ CFI_PUSH(%edi)
.p2align 4
L(StrncpyFillTailWithZero1):
@@ -2929,11 +2825,7 @@ L(StrncpyFillLess32):
L(ExitTail1):
movb (%ecx), %al
movb %al, (%edx)
-# ifdef USE_AS_STPCPY
- lea (%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (0)
# ifdef USE_AS_STRNCPY
sub $1, %ebx
lea 1(%edx), %ecx
@@ -2949,11 +2841,7 @@ L(ExitTail1):
L(ExitTail2):
movw (%ecx), %ax
movw %ax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 1(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (1)
# ifdef USE_AS_STRNCPY
sub $2, %ebx
lea 2(%edx), %ecx
@@ -2971,11 +2859,7 @@ L(ExitTail3):
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
-# ifdef USE_AS_STPCPY
- lea 2(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (2)
# ifdef USE_AS_STRNCPY
sub $3, %ebx
lea 3(%edx), %ecx
@@ -2991,11 +2875,7 @@ L(ExitTail3):
L(ExitTail4):
movl (%ecx), %eax
movl %eax, (%edx)
-# ifdef USE_AS_STPCPY
- lea 3(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (3)
# ifdef USE_AS_STRNCPY
sub $4, %ebx
lea 4(%edx), %ecx
@@ -3013,11 +2893,7 @@ L(ExitTail5):
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 4(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (4)
# ifdef USE_AS_STRNCPY
sub $5, %ebx
lea 5(%edx), %ecx
@@ -3035,11 +2911,7 @@ L(ExitTail6):
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
-# ifdef USE_AS_STPCPY
- lea 5(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (5)
# ifdef USE_AS_STRNCPY
sub $6, %ebx
lea 6(%edx), %ecx
@@ -3057,11 +2929,7 @@ L(ExitTail7):
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
-# ifdef USE_AS_STPCPY
- lea 6(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (6)
# ifdef USE_AS_STRNCPY
sub $7, %ebx
lea 7(%edx), %ecx
@@ -3077,33 +2945,21 @@ L(ExitTail7):
L(ExitTail8):
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
-# ifdef USE_AS_STPCPY
- lea 7(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (7)
# ifdef USE_AS_STRNCPY
sub $8, %ebx
lea 8(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
# endif
RETURN
.p2align 4
L(ExitTail9):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movb 8(%ecx), %al
+ movlpd %xmm0, (%edx)
movb %al, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 8(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (8)
# ifdef USE_AS_STRNCPY
sub $9, %ebx
lea 9(%edx), %ecx
@@ -3118,14 +2974,10 @@ L(ExitTail9):
.p2align 4
L(ExitTail10):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movw 8(%ecx), %ax
+ movlpd %xmm0, (%edx)
movw %ax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 9(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (9)
# ifdef USE_AS_STRNCPY
sub $10, %ebx
lea 10(%edx), %ecx
@@ -3140,14 +2992,10 @@ L(ExitTail10):
.p2align 4
L(ExitTail11):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 7(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 10(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (10)
# ifdef USE_AS_STRNCPY
sub $11, %ebx
lea 11(%edx), %ecx
@@ -3162,14 +3010,10 @@ L(ExitTail11):
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
movl %eax, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 11(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ SAVE_RESULT_TAIL (11)
# ifdef USE_AS_STRNCPY
sub $12, %ebx
lea 12(%edx), %ecx
@@ -3184,14 +3028,10 @@ L(ExitTail12):
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 5(%ecx), %xmm0
- movlpd %xmm0, 5(%edx)
-# ifdef USE_AS_STPCPY
- lea 12(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 5(%edx)
+ SAVE_RESULT_TAIL (12)
# ifdef USE_AS_STRNCPY
sub $13, %ebx
lea 13(%edx), %ecx
@@ -3206,19 +3046,15 @@ L(ExitTail13):
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 6(%ecx), %xmm0
- movlpd %xmm0, 6(%edx)
-# ifdef USE_AS_STPCPY
- lea 13(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 6(%edx)
+ SAVE_RESULT_TAIL (13)
# ifdef USE_AS_STRNCPY
sub $14, %ebx
lea 14(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
+# ifdef USE_AS_STPCPY
cmpb $1, (%eax)
sbb $-1, %eax
# endif
@@ -3228,36 +3064,22 @@ L(ExitTail14):
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
-# ifdef USE_AS_STPCPY
- lea 14(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movlpd %xmm1, 7(%edx)
+ SAVE_RESULT_TAIL (14)
# ifdef USE_AS_STRNCPY
sub $15, %ebx
lea 15(%edx), %ecx
jnz L(StrncpyFillTailWithZero)
-# ifdef USE_AS_STPCPY
- cmpb $1, (%eax)
- sbb $-1, %eax
-# endif
# endif
RETURN
.p2align 4
L(ExitTail16):
- movlpd (%ecx), %xmm0
- movlpd %xmm0, (%edx)
- movlpd 8(%ecx), %xmm0
- movlpd %xmm0, 8(%edx)
-# ifdef USE_AS_STPCPY
- lea 15(%edx), %eax
-# else
- movl %edx, %eax
-# endif
+ movdqu (%ecx), %xmm0
+ movdqu %xmm0, (%edx)
+ SAVE_RESULT_TAIL (15)
# ifdef USE_AS_STRNCPY
sub $16, %ebx
lea 16(%edx), %ecx
@@ -3268,13 +3090,14 @@ L(ExitTail16):
# endif
# endif
RETURN
-#endif
+# endif
# ifdef USE_AS_STRNCPY
# ifndef USE_AS_STRCAT
- CFI_PUSH (%esi)
- CFI_PUSH (%edi)
+ CFI_PUSH (%esi)
+ CFI_PUSH (%edi)
# endif
+ .p2align 4
L(StrncpyLeaveCase2OrCase3):
test %eax, %eax
jnz L(Aligned64LeaveCase2)
@@ -3327,153 +3150,153 @@ L(Aligned64LeaveCase2):
lea 16(%esi), %esi
lea -16(%ebx), %ebx
jmp L(CopyFrom1To16BytesCase2)
-/* -------------------------------------------------- */
+
+/*--------------------------------------------------*/
+ .p2align 4
L(StrncpyExit1Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $15, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 7(%edx)
mov $15, %esi
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit2Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $14, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 6(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 6(%edx)
mov $14, %esi
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit3Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $13, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd 5(%ecx), %xmm1
+ movlpd %xmm0, (%edx)
+ movlpd %xmm1, 5(%edx)
mov $13, %esi
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit4Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $12, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 8(%edx)
mov $12, %esi
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit5Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $11, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 7(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 7(%edx)
mov $11, %esi
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit6Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $10, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 6(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 6(%edx)
mov $10, %esi
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit7Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $9, %xmm6
+ movlpd (%ecx), %xmm0
+ movl 5(%ecx), %esi
+ movlpd %xmm0, (%edx)
+ movl %esi, 5(%edx)
mov $9, %esi
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit8Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $8, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $8, %esi
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit9Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $7, %xmm6
+ movlpd (%ecx), %xmm0
+ movlpd %xmm0, (%edx)
mov $7, %esi
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit10Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $6, %xmm6
+ movlpd -1(%ecx), %xmm0
+ movlpd %xmm0, -1(%edx)
mov $6, %esi
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit11Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $5, %xmm6
+ movlpd -2(%ecx), %xmm0
+ movlpd %xmm0, -2(%edx)
mov $5, %esi
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit12Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $4, %xmm6
+ movl (%ecx), %esi
+ movl %esi, (%edx)
mov $4, %esi
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit13Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $3, %xmm6
+ movl -1(%ecx), %esi
+ movl %esi, -1(%edx)
mov $3, %esi
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit14Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $2, %xmm6
+ movl -2(%ecx), %esi
+ movl %esi, -2(%edx)
mov $2, %esi
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
+ .p2align 4
L(StrncpyExit15Case2OrCase3):
- movaps (%edx), %xmm6
- psrldq $1, %xmm6
+ movl -3(%ecx), %esi
+ movl %esi, -3(%edx)
mov $1, %esi
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16BytesCase2)
jmp L(CopyFrom1To16BytesCase3)
@@ -3483,36 +3306,29 @@ L(StrncpyLeave1):
add $48, %ebx
jle L(StrncpyExit1)
palignr $1, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 31(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit1)
- palignr $1, %xmm1, %xmm2
+ palignr $1, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 31+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit1)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit1):
- movaps (%edx, %esi), %xmm6
- psrldq $15, %xmm6
- palignr $1, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 15(%esi), %esi
+ lea 15(%edx, %esi), %edx
+ lea 15(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave2):
@@ -3520,36 +3336,29 @@ L(StrncpyLeave2):
add $48, %ebx
jle L(StrncpyExit2)
palignr $2, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 30(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit2)
- palignr $2, %xmm1, %xmm2
+ palignr $2, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 30+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit2)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit2):
- movaps (%edx, %esi), %xmm6
- psrldq $14, %xmm6
- palignr $2, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 14(%esi), %esi
+ lea 14(%edx, %esi), %edx
+ lea 14(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave3):
@@ -3557,36 +3366,29 @@ L(StrncpyLeave3):
add $48, %ebx
jle L(StrncpyExit3)
palignr $3, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 29(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit3)
- palignr $3, %xmm1, %xmm2
+ palignr $3, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 29+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit3)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit3):
- movaps (%edx, %esi), %xmm6
- psrldq $13, %xmm6
- palignr $3, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 13(%esi), %esi
+ lea 13(%edx, %esi), %edx
+ lea 13(%ecx, %esi), %ecx
+ movdqu -16(%ecx), %xmm0
+ xor %esi, %esi
+ movdqu %xmm0, -16(%edx)
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave4):
@@ -3594,36 +3396,31 @@ L(StrncpyLeave4):
add $48, %ebx
jle L(StrncpyExit4)
palignr $4, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit4)
- palignr $4, %xmm1, %xmm2
+ palignr $4, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 28+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit4)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit4):
- movaps (%edx, %esi), %xmm6
- psrldq $12, %xmm6
- palignr $4, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 12(%esi), %esi
+ lea 12(%edx, %esi), %edx
+ lea 12(%ecx, %esi), %ecx
+ movlpd -12(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -12(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave5):
@@ -3631,36 +3428,31 @@ L(StrncpyLeave5):
add $48, %ebx
jle L(StrncpyExit5)
palignr $5, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 27(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit5)
- palignr $5, %xmm1, %xmm2
+ palignr $5, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 27+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit5)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit5):
- movaps (%edx, %esi), %xmm6
- psrldq $11, %xmm6
- palignr $5, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 11(%esi), %esi
+ lea 11(%edx, %esi), %edx
+ lea 11(%ecx, %esi), %ecx
+ movlpd -11(%ecx), %xmm0
+ movl -4(%ecx), %eax
+ movlpd %xmm0, -11(%edx)
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave6):
@@ -3668,36 +3460,32 @@ L(StrncpyLeave6):
add $48, %ebx
jle L(StrncpyExit6)
palignr $6, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 26(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit6)
- palignr $6, %xmm1, %xmm2
+ palignr $6, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 26+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit6)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit6):
- movaps (%edx, %esi), %xmm6
- psrldq $10, %xmm6
- palignr $6, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 10(%esi), %esi
+ lea 10(%edx, %esi), %edx
+ lea 10(%ecx, %esi), %ecx
+
+ movlpd -10(%ecx), %xmm0
+ movw -2(%ecx), %ax
+ movlpd %xmm0, -10(%edx)
+ movw %ax, -2(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave7):
@@ -3705,36 +3493,32 @@ L(StrncpyLeave7):
add $48, %ebx
jle L(StrncpyExit7)
palignr $7, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 25(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit7)
- palignr $7, %xmm1, %xmm2
+ palignr $7, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 25+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit7)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit7):
- movaps (%edx, %esi), %xmm6
- psrldq $9, %xmm6
- palignr $7, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 9(%esi), %esi
+ lea 9(%edx, %esi), %edx
+ lea 9(%ecx, %esi), %ecx
+
+ movlpd -9(%ecx), %xmm0
+ movb -1(%ecx), %ah
+ movlpd %xmm0, -9(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave8):
@@ -3742,36 +3526,29 @@ L(StrncpyLeave8):
add $48, %ebx
jle L(StrncpyExit8)
palignr $8, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit8)
- palignr $8, %xmm1, %xmm2
+ palignr $8, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 24+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit8)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit8):
- movaps (%edx, %esi), %xmm6
- psrldq $8, %xmm6
- palignr $8, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 8(%esi), %esi
+ lea 8(%edx, %esi), %edx
+ lea 8(%ecx, %esi), %ecx
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave9):
@@ -3779,36 +3556,30 @@ L(StrncpyLeave9):
add $48, %ebx
jle L(StrncpyExit9)
palignr $9, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 23(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit9)
- palignr $9, %xmm1, %xmm2
+ palignr $9, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 23+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit9)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit9):
- movaps (%edx, %esi), %xmm6
- psrldq $7, %xmm6
- palignr $9, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 7(%esi), %esi
+ lea 7(%edx, %esi), %edx
+ lea 7(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave10):
@@ -3816,36 +3587,30 @@ L(StrncpyLeave10):
add $48, %ebx
jle L(StrncpyExit10)
palignr $10, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 22(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit10)
- palignr $10, %xmm1, %xmm2
+ palignr $10, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 22+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit10)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit10):
- movaps (%edx, %esi), %xmm6
- psrldq $6, %xmm6
- palignr $10, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 6(%esi), %esi
+ lea 6(%edx, %esi), %edx
+ lea 6(%ecx, %esi), %ecx
+
+ movlpd -8(%ecx), %xmm0
+ movlpd %xmm0, -8(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave11):
@@ -3853,36 +3618,31 @@ L(StrncpyLeave11):
add $48, %ebx
jle L(StrncpyExit11)
palignr $11, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 21(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit11)
- palignr $11, %xmm1, %xmm2
+ palignr $11, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 21+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit11)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit11):
- movaps (%edx, %esi), %xmm6
- psrldq $5, %xmm6
- palignr $11, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 5(%esi), %esi
+ lea 5(%edx, %esi), %edx
+ lea 5(%ecx, %esi), %ecx
+ movl -5(%ecx), %esi
+ movb -1(%ecx), %ah
+ movl %esi, -5(%edx)
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave12):
@@ -3890,36 +3650,29 @@ L(StrncpyLeave12):
add $48, %ebx
jle L(StrncpyExit12)
palignr $12, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit12)
- palignr $12, %xmm1, %xmm2
+ palignr $12, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 20+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit12)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit12):
- movaps (%edx, %esi), %xmm6
- psrldq $4, %xmm6
- palignr $12, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 4(%esi), %esi
+ lea 4(%edx, %esi), %edx
+ lea 4(%ecx, %esi), %ecx
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave13):
@@ -3927,36 +3680,30 @@ L(StrncpyLeave13):
add $48, %ebx
jle L(StrncpyExit13)
palignr $13, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 19(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit13)
- palignr $13, %xmm1, %xmm2
+ palignr $13, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 19+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit13)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit13):
- movaps (%edx, %esi), %xmm6
- psrldq $3, %xmm6
- palignr $13, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 3(%esi), %esi
+ lea 3(%edx, %esi), %edx
+ lea 3(%ecx, %esi), %ecx
+
+ movl -4(%ecx), %eax
+ movl %eax, -4(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave14):
@@ -3964,36 +3711,29 @@ L(StrncpyLeave14):
add $48, %ebx
jle L(StrncpyExit14)
palignr $14, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 18(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit14)
- palignr $14, %xmm1, %xmm2
+ palignr $14, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 18+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit14)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit14):
- movaps (%edx, %esi), %xmm6
- psrldq $2, %xmm6
- palignr $14, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 2(%esi), %esi
+ lea 2(%edx, %esi), %edx
+ lea 2(%ecx, %esi), %ecx
+ movw -2(%ecx), %ax
+ movw %ax, -2(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
L(StrncpyLeave15):
@@ -4001,43 +3741,36 @@ L(StrncpyLeave15):
add $48, %ebx
jle L(StrncpyExit15)
palignr $15, %xmm1, %xmm2
- movaps %xmm3, %xmm1
movaps %xmm2, (%edx)
movaps 17(%ecx), %xmm2
lea 16(%esi), %esi
- movaps %xmm2, %xmm3
sub $16, %ebx
jbe L(StrncpyExit15)
- palignr $15, %xmm1, %xmm2
+ palignr $15, %xmm3, %xmm2
movaps %xmm2, 16(%edx)
- movaps 17+16(%ecx), %xmm2
- movaps %xmm3, %xmm1
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
- movaps %xmm2, %xmm1
movaps %xmm4, 32(%edx)
lea 16(%esi), %esi
sub $16, %ebx
jbe L(StrncpyExit15)
- movaps %xmm7, %xmm1
movaps %xmm5, 48(%edx)
lea 16(%esi), %esi
lea -16(%ebx), %ebx
-
L(StrncpyExit15):
- movaps (%edx, %esi), %xmm6
- psrldq $1, %xmm6
- palignr $15, %xmm1, %xmm6
- movaps %xmm6, (%edx, %esi)
- lea 1(%esi), %esi
+ lea 1(%edx, %esi), %edx
+ lea 1(%ecx, %esi), %ecx
+ movb -1(%ecx), %ah
+ movb %ah, -1(%edx)
+ xor %esi, %esi
jmp L(CopyFrom1To16BytesCase3)
# endif
# ifndef USE_AS_STRCAT
# ifdef USE_AS_STRNCPY
- CFI_POP (%esi)
- CFI_POP (%edi)
+ CFI_POP (%esi)
+ CFI_POP (%edi)
.p2align 4
L(ExitTail0):
@@ -4046,20 +3779,14 @@ L(ExitTail0):
.p2align 4
L(StrncpyExit15Bytes):
- cmp $9, %ebx
- je L(ExitTail9)
+ cmp $12, %ebx
+ jbe L(StrncpyExit12Bytes)
cmpb $0, 8(%ecx)
jz L(ExitTail9)
- cmp $10, %ebx
- je L(ExitTail10)
cmpb $0, 9(%ecx)
jz L(ExitTail10)
- cmp $11, %ebx
- je L(ExitTail11)
cmpb $0, 10(%ecx)
jz L(ExitTail11)
- cmp $12, %ebx
- je L(ExitTail12)
cmpb $0, 11(%ecx)
jz L(ExitTail12)
cmp $13, %ebx
@@ -4071,9 +3798,9 @@ L(StrncpyExit15Bytes):
cmpb $0, 13(%ecx)
jz L(ExitTail14)
movlpd (%ecx), %xmm0
+ movlpd 7(%ecx), %xmm1
movlpd %xmm0, (%edx)
- movlpd 7(%ecx), %xmm0
- movlpd %xmm0, 7(%edx)
+ movlpd %xmm1, 7(%edx)
# ifdef USE_AS_STPCPY
lea 14(%edx), %eax
cmpb $1, (%eax)
@@ -4084,23 +3811,43 @@ L(StrncpyExit15Bytes):
RETURN
.p2align 4
+L(StrncpyExit12Bytes):
+ cmp $9, %ebx
+ je L(ExitTail9)
+ cmpb $0, 8(%ecx)
+ jz L(ExitTail9)
+ cmp $10, %ebx
+ je L(ExitTail10)
+ cmpb $0, 9(%ecx)
+ jz L(ExitTail10)
+ cmp $11, %ebx
+ je L(ExitTail11)
+ cmpb $0, 10(%ecx)
+ jz L(ExitTail11)
+ movlpd (%ecx), %xmm0
+ movl 8(%ecx), %eax
+ movlpd %xmm0, (%edx)
+ movl %eax, 8(%edx)
+ SAVE_RESULT_TAIL (11)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+
+ .p2align 4
L(StrncpyExit8Bytes):
- cmp $1, %ebx
- je L(ExitTail1)
+ cmp $4, %ebx
+ jbe L(StrncpyExit4Bytes)
cmpb $0, (%ecx)
jz L(ExitTail1)
- cmp $2, %ebx
- je L(ExitTail2)
cmpb $0, 1(%ecx)
jz L(ExitTail2)
- cmp $3, %ebx
- je L(ExitTail3)
cmpb $0, 2(%ecx)
jz L(ExitTail3)
- cmp $4, %ebx
- je L(ExitTail4)
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+
cmp $5, %ebx
je L(ExitTail5)
cmpb $0, 4(%ecx)
@@ -4123,8 +3870,32 @@ L(StrncpyExit8Bytes):
movl %edx, %eax
# endif
RETURN
-# endif
+ .p2align 4
+L(StrncpyExit4Bytes):
+ test %ebx, %ebx
+ jz L(ExitTail0)
+ cmp $1, %ebx
+ je L(ExitTail1)
+ cmpb $0, (%ecx)
+ jz L(ExitTail1)
+ cmp $2, %ebx
+ je L(ExitTail2)
+ cmpb $0, 1(%ecx)
+ jz L(ExitTail2)
+ cmp $3, %ebx
+ je L(ExitTail3)
+ cmpb $0, 2(%ecx)
+ jz L(ExitTail3)
+ movl (%ecx), %eax
+ movl %eax, (%edx)
+ SAVE_RESULT_TAIL (3)
+# ifdef USE_AS_STPCPY
+ cmpb $1, (%eax)
+ sbb $-1, %eax
+# endif
+ RETURN
+# endif
END (STRCPY)
# endif
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 5 +
sysdeps/i386/i686/multiarch/strcpy-ssse3.S | 1265 ++++++++++++----------------
2 files changed, 523 insertions(+), 747 deletions(-)
hooks/post-receive
--
GNU C Library master sources