This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.14-225-gb0fc1ff


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  b0fc1ff04ee0775dbe1b5d6d4a2c87e2d7decb06 (commit)
       via  693fb948841c7564ce3cd1ec4b31c0130abc8c42 (commit)
      from  49d42c37ba4f688ed442bfa0ff54e851b58e607b (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=b0fc1ff04ee0775dbe1b5d6d4a2c87e2d7decb06

commit b0fc1ff04ee0775dbe1b5d6d4a2c87e2d7decb06
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Mon Sep 5 17:12:27 2011 -0400

    Fix whitespaces

diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2.S b/sysdeps/i386/i686/multiarch/strchr-sse2.S
index 7702210..a73b21e 100644
--- a/sysdeps/i386/i686/multiarch/strchr-sse2.S
+++ b/sysdeps/i386/i686/multiarch/strchr-sse2.S
@@ -347,4 +347,3 @@ L(return_null):
 
 END (__strchr_sse2)
 #endif
-
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
index f1bcb78..f40dfdc 100644
--- a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
+++ b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
@@ -281,4 +281,3 @@ L(return_null_1):
 
 END (__strrchr_sse2_bsf)
 #endif
-

http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=693fb948841c7564ce3cd1ec4b31c0130abc8c42

commit 693fb948841c7564ce3cd1ec4b31c0130abc8c42
Author: Liubov Dmitrieva <liubov.dmitrieva@intel.com>
Date:   Mon Sep 5 17:11:11 2011 -0400

    Optimized strchr and strrchr with SSE2 on x86-32

diff --git a/ChangeLog b/ChangeLog
index 3d10ec2..b0a79e1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2011-07-20  Liubov Dmitrieva  <liubov.dmitrieva@intel.com>
+
+	* sysdeps/i386/i686/multiarch/Makefile (sysdep_routines): Add
+	strchr-sse2 strrchr-sse2 strchr-sse2-bsf
+	strrchr-sse2-bsf
+	* sysdeps/i386/i686/multiarch/strchr.S: New file.
+	* sysdeps/i386/i686/multiarch/strrchr.S: New file.
+	* sysdeps/i386/i686/multiarch/strchr-sse2.S: New file.
+	* sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S: New file.
+	* sysdeps/i386/i686/multiarch/strrchr-sse2.S: New file.
+	* sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S: New file.
+
 2011-08-29  Liubov Dmitrieva  <liubov.dmitrieva@gmail.com>
 
 	* sysdeps/x86_64/wcscmp.S: New file.
diff --git a/NEWS b/NEWS
index 5e407f6..e38c57f 100644
--- a/NEWS
+++ b/NEWS
@@ -29,6 +29,9 @@ Version 2.15
   x86-64.
   Contributed by Liubov Dmitrieva.
 
+* Optimized strchrm strrchr for SSE on x86-32.
+  Contributed by Liubov Dmitrieva.
+
 * New interfaces: scandirat, scandirat64
   Implemented by Ulrich Drepper.
 
diff --git a/sysdeps/i386/i686/multiarch/Makefile b/sysdeps/i386/i686/multiarch/Makefile
index 1449d91..c89ae92 100644
--- a/sysdeps/i386/i686/multiarch/Makefile
+++ b/sysdeps/i386/i686/multiarch/Makefile
@@ -14,7 +14,8 @@ sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
 		   strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 strcpy-sse2 \
 		   strncpy-sse2 stpcpy-sse2 stpncpy-sse2 strcat-ssse3 \
 		   strcat-sse2 strncat-ssse3 strncat-sse2 strncat-c \
-			wcscmp-sse2 wcscmp-c
+		   strchr-sse2 strrchr-sse2 strchr-sse2-bsf strrchr-sse2-bsf \
+		   wcscmp-sse2 wcscmp-c
 ifeq (yes,$(config-cflags-sse4))
 sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
 CFLAGS-varshift.c += -msse4
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
new file mode 100644
index 0000000..5a19ba2
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
@@ -0,0 +1,159 @@
+/* strchr with SSE2 with bsf
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS  8
+# define ENTRANCE PUSH(%edi)
+# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
+
+# define STR1  PARMS
+# define STR2  STR1+4
+
+	.text
+ENTRY (__strchr_sse2_bsf)
+
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	pxor	%xmm2, %xmm2
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+	punpcklbw %xmm1, %xmm1
+	/* ECX has OFFSET. */
+	and	$15, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	je	L(loop)
+
+/* Handle unaligned string.  */
+	and	$-16, %edi
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %edx
+	sarl	%cl, %eax
+	test	%eax, %eax
+	je	L(unaligned_no_match)
+	/* Check which byte is a match.  */
+	bsf	%eax, %eax
+	/* Is there a NULL? */
+	test	%edx, %edx
+	je	L(unaligned_match)
+	bsf	%edx, %edx
+	cmpl	%edx, %eax
+	/* Return NULL if NULL comes first.  */
+	ja	L(return_null)
+L(unaligned_match):
+	add	%edi, %eax
+	add	%ecx, %eax
+	RETURN
+
+	.p2align 4
+L(unaligned_no_match):
+	test	%edx, %edx
+	jne	L(return_null)
+	pxor	%xmm2, %xmm2
+
+	add	$16, %edi
+
+	.p2align 4
+/* Loop start on aligned string.  */
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	or	%eax, %edx
+	jnz	L(matches)
+	jmp	L(loop)
+
+L(matches):
+	pmovmskb %xmm2, %edx
+	test	%eax, %eax
+	jz	L(return_null)
+	bsf	%eax, %eax
+	/* There is a match.  First find where NULL is.  */
+	test	%edx, %edx
+	je	L(match)
+	bsf	%edx, %ecx
+	/* Check if NULL comes first.  */
+	cmpl	%ecx, %eax
+	ja	L(return_null)
+L(match):
+	sub	$16, %edi
+	add	%edi, %eax
+	RETURN
+
+/* Return NULL.  */
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+
+END (__strchr_sse2_bsf)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strchr-sse2.S b/sysdeps/i386/i686/multiarch/strchr-sse2.S
new file mode 100644
index 0000000..7702210
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strchr-sse2.S
@@ -0,0 +1,350 @@
+/* strchr SSE2 without bsf
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS  8
+# define ENTRANCE PUSH(%edi)
+# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
+
+# define STR1  PARMS
+# define STR2  STR1+4
+
+	.text
+ENTRY (__strchr_sse2)
+
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	pxor	%xmm2, %xmm2
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+	punpcklbw %xmm1, %xmm1
+	/* ECX has OFFSET. */
+	and	$15, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	je	L(loop)
+
+/* Handle unaligned string.  */
+	and	$-16, %edi
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	/* Remove the leading bytes.  */
+	sarl	%cl, %edx
+	sarl	%cl, %eax
+	test	%eax, %eax
+	jz	L(unaligned_no_match)
+	/* Check which byte is a match.  */
+	/* Is there a NULL? */
+	add	%ecx, %edi
+	test	%edx, %edx
+	jz	L(match_case1)
+	jmp	L(match_case2)
+
+	.p2align 4
+L(unaligned_no_match):
+	test	%edx, %edx
+	jne	L(return_null)
+
+	pxor	%xmm2, %xmm2
+	add	$16, %edi
+
+	.p2align 4
+/* Loop start on aligned string.  */
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %edx
+	pmovmskb %xmm0, %eax
+	test	%eax, %eax
+	jnz	L(matches)
+	test	%edx, %edx
+	jnz	L(return_null)
+	add	$16, %edi
+	jmp	L(loop)
+
+L(matches):
+	/* There is a match.  First find where NULL is.  */
+	test	%edx, %edx
+	jz	L(match_case1)
+
+	.p2align 4
+L(match_case2):
+	test	%al, %al
+	jz	L(match_higth_case2)
+
+	mov	%al, %cl
+	and	$15, %cl
+	jnz	L(match_case2_4)
+
+	mov	%dl, %ch
+	and	$15, %ch
+	jnz	L(return_null)
+
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x10, %dl
+	jnz	L(return_null)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x20, %dl
+	jnz	L(return_null)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	test	$0x40, %dl
+	jnz	L(return_null)
+	lea	7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_4):
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x01, %dl
+	jnz	L(return_null)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x02, %dl
+	jnz	L(return_null)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x04, %dl
+	jnz	L(return_null)
+	lea	3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_higth_case2):
+	test	%dl, %dl
+	jnz	L(return_null)
+
+	mov	%ah, %cl
+	and	$15, %cl
+	jnz	L(match_case2_12)
+
+	mov	%dh, %ch
+	and	$15, %ch
+	jnz	L(return_null)
+
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x10, %dh
+	jnz	L(return_null)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x20, %dh
+	jnz	L(return_null)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	test	$0x40, %dh
+	jnz	L(return_null)
+	lea	15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case2_12):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x01, %dh
+	jnz	L(return_null)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x02, %dh
+	jnz	L(return_null)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x04, %dh
+	jnz	L(return_null)
+	lea	11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_case1):
+	test	%al, %al
+	jz	L(match_higth_case1)
+
+	test	$0x01, %al
+	jnz	L(Exit1)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	test	$0x10, %al
+	jnz	L(Exit5)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	lea	7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_higth_case1):
+	test	$0x01, %ah
+	jnz	L(Exit9)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x08, %ah
+	jnz	L(Exit12)
+	test	$0x10, %ah
+	jnz	L(Exit13)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	lea	15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit1):
+	lea	(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit2):
+	lea	1(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit3):
+	lea	2(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit4):
+	lea	3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit5):
+	lea	4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit6):
+	lea	5(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit7):
+	lea	6(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit9):
+	lea	8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit10):
+	lea	9(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit11):
+	lea	10(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit12):
+	lea	11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit13):
+	lea	12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit14):
+	lea	13(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit15):
+	lea	14(%edi), %eax
+	RETURN
+
+/* Return NULL.  */
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+
+END (__strchr_sse2)
+#endif
+
diff --git a/sysdeps/i386/i686/multiarch/strchr.S b/sysdeps/i386/i686/multiarch/strchr.S
new file mode 100644
index 0000000..aed967c
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strchr.S
@@ -0,0 +1,76 @@
+/* Multiple versions of strchr
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef NOT_IN_libc
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	.p2align 4
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+
+	.text
+ENTRY(strchr)
+	.type	strchr, @gnu_indirect_function
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+	jne	1f
+	call	__init_cpu_features
+1:	leal	__strchr_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__strchr_sse2_bsf@GOTOFF(%ebx), %eax
+	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__strchr_sse2@GOTOFF(%ebx), %eax
+2:	popl	%ebx
+	cfi_adjust_cfa_offset (-4);
+	cfi_restore (ebx)
+	ret
+END(strchr)
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type __strchr_ia32, @function; \
+	.globl __strchr_ia32; \
+	.p2align 4; \
+	__strchr_ia32: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size __strchr_ia32, .-__strchr_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+   they will be called without setting up EBX needed for PLT which is
+   used by IFUNC.  */
+# define libc_hidden_builtin_def(name) \
+	.globl __GI_strchr; __GI_strchr = __strchr_ia32
+#endif
+
+#include "../../i586/strchr.S"
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
new file mode 100644
index 0000000..f1bcb78
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
@@ -0,0 +1,284 @@
+/* strrchr with SSE2 with bsf and bsr
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS  4
+# define STR1  PARMS
+# define STR2  STR1+4
+
+	.text
+ENTRY (__strrchr_sse2_bsf)
+
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	PUSH	(%edi)
+	pxor	%xmm2, %xmm2
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+	punpcklbw %xmm1, %xmm1
+	/* ECX has OFFSET. */
+	and	$63, %ecx
+	cmp	$48, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	ja	L(crosscashe)
+
+/* unaligned string. */
+	movdqu	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+
+	test	%eax, %eax
+	jnz	L(unaligned_match1)
+
+	test	%edx, %edx
+	jnz	L(return_null)
+
+	and	$-16, %edi
+	add	$16, %edi
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	xor	%ebx, %ebx
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+	CFI_POP	(%ebx)
+
+	.p2align 4
+L(unaligned_return_value1):
+	bsf	%edx, %ecx
+	mov	$2, %edx
+	shl	%cl, %edx
+	sub	$1, %edx
+	and	%edx, %eax
+	jz	L(return_null)
+	bsr	%eax, %eax
+	add	%edi, %eax
+	POP	(%edi)
+	ret
+	CFI_PUSH	(%edi)
+
+	.p2align 4
+L(unaligned_match1):
+	test	%edx, %edx
+	jnz	L(unaligned_return_value1)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	mov	%eax, %ebx
+	lea	16(%edi), %esi
+	and	$-16, %edi
+	add	$16, %edi
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+	CFI_POP	(%ebx)
+
+	.p2align 4
+	L(crosscashe):
+/* Hancle unaligned string.  */
+	and	$15, %ecx
+	and	$-16, %edi
+	pxor	%xmm3, %xmm3
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm3
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm3, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	/* Remove the leading bytes.  */
+	shr	%cl, %edx
+	shr	%cl, %eax
+
+	test	%eax, %eax
+	jnz	L(unaligned_match)
+
+	test	%edx, %edx
+	jnz	L(return_null)
+
+	add	$16, %edi
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	xor	%ebx, %ebx
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+	CFI_POP	(%ebx)
+
+	.p2align 4
+L(unaligned_return_value):
+	add	%ecx, %edi
+	bsf	%edx, %ecx
+	mov	$2, %edx
+	shl	%cl, %edx
+	sub	$1, %edx
+	and	%edx, %eax
+	jz	L(return_null)
+	bsr	%eax, %eax
+	add	%edi, %eax
+	POP	(%edi)
+	ret
+	CFI_PUSH	(%edi)
+
+	.p2align 4
+L(unaligned_match):
+	test	%edx, %edx
+	jnz	L(unaligned_return_value)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	mov	%eax, %ebx
+	add	$16, %edi
+	lea	(%edi, %ecx), %esi
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jz	L(loop)
+
+L(matches):
+	test	%eax, %eax
+	jnz	L(match)
+L(return_value):
+	test	%ebx, %ebx
+	jz	L(return_null_1)
+	bsr	%ebx, %eax
+	add	%esi, %eax
+
+	POP	(%ebx)
+	POP	(%esi)
+
+	sub	$16, %eax
+	POP	(%edi)
+	ret
+
+	CFI_PUSH	(%edi)
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(match):
+	pmovmskb %xmm2, %ecx
+	test	%ecx, %ecx
+	jnz	L(return_value_1)
+	mov	%eax, %ebx
+	mov	%edi, %esi
+	jmp	L(loop)
+
+	.p2align 4
+L(return_value_1):
+	bsf	%ecx, %ecx
+	mov	$2, %edx
+	shl	%cl, %edx
+	sub	$1, %edx
+	and	%edx, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+
+	bsr	%eax, %eax
+	add	%edi, %eax
+	sub	$16, %eax
+	POP	(%edi)
+	ret
+
+	CFI_PUSH	(%edi)
+/* Return NULL.  */
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	POP	(%edi)
+	ret
+
+	CFI_PUSH	(%edi)
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+/* Return NULL.  */
+	.p2align 4
+L(return_null_1):
+	POP	(%ebx)
+	POP	(%esi)
+	POP	(%edi)
+	xor	%eax, %eax
+	ret
+
+END (__strrchr_sse2_bsf)
+#endif
+
diff --git a/sysdeps/i386/i686/multiarch/strrchr-sse2.S b/sysdeps/i386/i686/multiarch/strrchr-sse2.S
new file mode 100644
index 0000000..71cc69d
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strrchr-sse2.S
@@ -0,0 +1,709 @@
+/* strrchr SSE2 without bsf and bsr
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef NOT_IN_libc
+
+# include <sysdep.h>
+
+# define CFI_PUSH(REG)	\
+	cfi_adjust_cfa_offset (4);	\
+	cfi_rel_offset (REG, 0)
+
+# define CFI_POP(REG)	\
+	cfi_adjust_cfa_offset (-4);	\
+	cfi_restore (REG)
+
+# define PUSH(REG) pushl REG; CFI_PUSH (REG)
+# define POP(REG) popl REG; CFI_POP (REG)
+
+# define PARMS  8
+# define ENTRANCE PUSH(%edi);
+# define RETURN  POP(%edi); ret; CFI_PUSH(%edi);
+
+# define STR1  PARMS
+# define STR2  STR1+4
+
+	.text
+ENTRY (__strrchr_sse2)
+
+	ENTRANCE
+	mov	STR1(%esp), %ecx
+	movd	STR2(%esp), %xmm1
+
+	pxor	%xmm2, %xmm2
+	mov	%ecx, %edi
+	punpcklbw %xmm1, %xmm1
+	punpcklbw %xmm1, %xmm1
+	/* ECX has OFFSET. */
+	and	$63, %ecx
+	cmp	$48, %ecx
+	pshufd	$0, %xmm1, %xmm1
+	ja	L(crosscache)
+
+/* unaligned string. */
+	movdqu	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm2, %ecx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	add	$16, %edi
+
+	test	%eax, %eax
+	jnz	L(unaligned_match1)
+
+	test	%ecx, %ecx
+	jnz	L(return_null)
+
+	and	$-16, %edi
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	xor	%ebx, %ebx
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+	CFI_POP	(%ebx)
+
+	.p2align 4
+L(unaligned_match1):
+	test	%ecx, %ecx
+	jnz	L(prolog_find_zero_1)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	mov	%eax, %ebx
+	mov	%edi, %esi
+	and	$-16, %edi
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+	CFI_POP	(%ebx)
+
+	.p2align 4
+L(crosscache):
+/* Hancle unaligned string.  */
+	and	$15, %ecx
+	and	$-16, %edi
+	pxor	%xmm3, %xmm3
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm3
+	pcmpeqb	%xmm1, %xmm0
+	/* Find where NULL is.  */
+	pmovmskb %xmm3, %edx
+	/* Check if there is a match.  */
+	pmovmskb %xmm0, %eax
+	/* Remove the leading bytes.  */
+	shr	%cl, %edx
+	shr	%cl, %eax
+	add	$16, %edi
+
+	test	%eax, %eax
+	jnz	L(unaligned_match)
+
+	test	%edx, %edx
+	jnz	L(return_null)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	xor	%ebx, %ebx
+	jmp	L(loop)
+
+	CFI_POP	(%esi)
+	CFI_POP	(%ebx)
+
+	.p2align 4
+L(unaligned_match):
+	test	%edx, %edx
+	jnz	L(prolog_find_zero)
+
+	PUSH	(%esi)
+	PUSH	(%ebx)
+
+	mov	%eax, %ebx
+	lea	(%edi, %ecx), %esi
+
+/* Loop start on aligned string.  */
+	.p2align 4
+L(loop):
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jnz	L(matches)
+
+	movdqa	(%edi), %xmm0
+	pcmpeqb	%xmm0, %xmm2
+	add	$16, %edi
+	pcmpeqb	%xmm1, %xmm0
+	pmovmskb %xmm2, %ecx
+	pmovmskb %xmm0, %eax
+	or	%eax, %ecx
+	jz	L(loop)
+
+L(matches):
+	test	%eax, %eax
+	jnz	L(match)
+L(return_value):
+	test	%ebx, %ebx
+	jz	L(return_null_1)
+	mov	%ebx, %eax
+	mov	%esi, %edi
+
+	POP	(%ebx)
+	POP	(%esi)
+
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(return_null_1):
+	POP	(%ebx)
+	POP	(%esi)
+
+	xor	%eax, %eax
+	RETURN
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(match):
+	pmovmskb %xmm2, %ecx
+	test	%ecx, %ecx
+	jnz	L(find_zero)
+	mov	%eax, %ebx
+	mov	%edi, %esi
+	jmp	L(loop)
+
+	.p2align 4
+L(find_zero):
+	test	%cl, %cl
+	jz	L(find_zero_high)
+	mov	%cl, %dl
+	and	$15, %dl
+	jz	L(find_zero_8)
+	test	$0x01, %cl
+	jnz	L(FindZeroExit1)
+	test	$0x02, %cl
+	jnz	L(FindZeroExit2)
+	test	$0x04, %cl
+	jnz	L(FindZeroExit3)
+	and	$1 << 4 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_8):
+	test	$0x10, %cl
+	jnz	L(FindZeroExit5)
+	test	$0x20, %cl
+	jnz	L(FindZeroExit6)
+	test	$0x40, %cl
+	jnz	L(FindZeroExit7)
+	and	$1 << 8 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_high):
+	mov	%ch, %dh
+	and	$15, %dh
+	jz	L(find_zero_high_8)
+	test	$0x01, %ch
+	jnz	L(FindZeroExit9)
+	test	$0x02, %ch
+	jnz	L(FindZeroExit10)
+	test	$0x04, %ch
+	jnz	L(FindZeroExit11)
+	and	$1 << 12 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(find_zero_high_8):
+	test	$0x10, %ch
+	jnz	L(FindZeroExit13)
+	test	$0x20, %ch
+	jnz	L(FindZeroExit14)
+	test	$0x40, %ch
+	jnz	L(FindZeroExit15)
+	and	$1 << 16 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit1):
+	and	$1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit2):
+	and	$1 << 2 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit3):
+	and	$1 << 3 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit5):
+	and	$1 << 5 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit6):
+	and	$1 << 6 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit7):
+	and	$1 << 7 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit9):
+	and	$1 << 9 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit10):
+	and	$1 << 10 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit11):
+	and	$1 << 11 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit13):
+	and	$1 << 13 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit14):
+	and	$1 << 14 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+	jmp	L(match_exit)
+
+	CFI_PUSH	(%ebx)
+	CFI_PUSH	(%esi)
+
+	.p2align 4
+L(FindZeroExit15):
+	and	$1 << 15 - 1, %eax
+	jz	L(return_value)
+
+	POP	(%ebx)
+	POP	(%esi)
+
+	.p2align 4
+L(match_exit):
+	test	%ah, %ah
+	jnz	L(match_exit_high)
+	mov	%al, %dl
+	and	$15 << 4, %dl
+	jnz	L(match_exit_8)
+	test	$0x08, %al
+	jnz	L(Exit4)
+	test	$0x04, %al
+	jnz	L(Exit3)
+	test	$0x02, %al
+	jnz	L(Exit2)
+	lea	-16(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_exit_8):
+	test	$0x80, %al
+	jnz	L(Exit8)
+	test	$0x40, %al
+	jnz	L(Exit7)
+	test	$0x20, %al
+	jnz	L(Exit6)
+	lea	-12(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_exit_high):
+	mov	%ah, %dh
+	and	$15 << 4, %dh
+	jnz	L(match_exit_high_8)
+	test	$0x08, %ah
+	jnz	L(Exit12)
+	test	$0x04, %ah
+	jnz	L(Exit11)
+	test	$0x02, %ah
+	jnz	L(Exit10)
+	lea	-8(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(match_exit_high_8):
+	test	$0x80, %ah
+	jnz	L(Exit16)
+	test	$0x40, %ah
+	jnz	L(Exit15)
+	test	$0x20, %ah
+	jnz	L(Exit14)
+	lea	-4(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit2):
+	lea	-15(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit3):
+	lea	-14(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit4):
+	lea	-13(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit6):
+	lea	-11(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit7):
+	lea	-10(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit8):
+	lea	-9(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit10):
+	lea	-7(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit11):
+	lea	-6(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit12):
+	lea	-5(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit14):
+	lea	-3(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit15):
+	lea	-2(%edi), %eax
+	RETURN
+
+	.p2align 4
+L(Exit16):
+	lea	-1(%edi), %eax
+	RETURN
+
+/* Return NULL.  */
+	.p2align 4
+L(return_null):
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero):
+	add	%ecx, %edi
+	mov     %edx, %ecx
+L(prolog_find_zero_1):
+	test	%cl, %cl
+	jz	L(prolog_find_zero_high)
+	mov	%cl, %dl
+	and	$15, %dl
+	jz	L(prolog_find_zero_8)
+	test	$0x01, %cl
+	jnz	L(PrologFindZeroExit1)
+	test	$0x02, %cl
+	jnz	L(PrologFindZeroExit2)
+	test	$0x04, %cl
+	jnz	L(PrologFindZeroExit3)
+	and	$1 << 4 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_8):
+	test	$0x10, %cl
+	jnz	L(PrologFindZeroExit5)
+	test	$0x20, %cl
+	jnz	L(PrologFindZeroExit6)
+	test	$0x40, %cl
+	jnz	L(PrologFindZeroExit7)
+	and	$1 << 8 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_high):
+	mov	%ch, %dh
+	and	$15, %dh
+	jz	L(prolog_find_zero_high_8)
+	test	$0x01, %ch
+	jnz	L(PrologFindZeroExit9)
+	test	$0x02, %ch
+	jnz	L(PrologFindZeroExit10)
+	test	$0x04, %ch
+	jnz	L(PrologFindZeroExit11)
+	and	$1 << 12 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(prolog_find_zero_high_8):
+	test	$0x10, %ch
+	jnz	L(PrologFindZeroExit13)
+	test	$0x20, %ch
+	jnz	L(PrologFindZeroExit14)
+	test	$0x40, %ch
+	jnz	L(PrologFindZeroExit15)
+	and	$1 << 16 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit1):
+	and	$1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit2):
+	and	$1 << 2 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit3):
+	and	$1 << 3 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit5):
+	and	$1 << 5 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit6):
+	and	$1 << 6 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit7):
+	and	$1 << 7 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit9):
+	and	$1 << 9 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit10):
+	and	$1 << 10 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit11):
+	and	$1 << 11 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit13):
+	and	$1 << 13 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit14):
+	and	$1 << 14 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+	.p2align 4
+L(PrologFindZeroExit15):
+	and	$1 << 15 - 1, %eax
+	jnz	L(match_exit)
+	xor	%eax, %eax
+	RETURN
+
+END (__strrchr_sse2)
+#endif
diff --git a/sysdeps/i386/i686/multiarch/strrchr.S b/sysdeps/i386/i686/multiarch/strrchr.S
new file mode 100644
index 0000000..866cac0
--- /dev/null
+++ b/sysdeps/i386/i686/multiarch/strrchr.S
@@ -0,0 +1,76 @@
+/* Multiple versions of strrchr
+   Copyright (C) 2011 Free Software Foundation, Inc.
+   Contributed by Intel Corporation.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <sysdep.h>
+#include <init-arch.h>
+
+#ifndef NOT_IN_libc
+	.section	.gnu.linkonce.t.__i686.get_pc_thunk.bx,"ax",@progbits
+	.globl	__i686.get_pc_thunk.bx
+	.hidden	__i686.get_pc_thunk.bx
+	.p2align 4
+	.type	__i686.get_pc_thunk.bx,@function
+__i686.get_pc_thunk.bx:
+	movl	(%esp), %ebx
+	ret
+
+	.text
+ENTRY(strrchr)
+	.type	strrchr, @gnu_indirect_function
+	pushl	%ebx
+	cfi_adjust_cfa_offset (4)
+	cfi_rel_offset (ebx, 0)
+	call	__i686.get_pc_thunk.bx
+	addl	$_GLOBAL_OFFSET_TABLE_, %ebx
+	cmpl	$0, KIND_OFFSET+__cpu_features@GOTOFF(%ebx)
+	jne	1f
+	call	__init_cpu_features
+1:	leal	__strrchr_ia32@GOTOFF(%ebx), %eax
+	testl	$bit_SSE2, CPUID_OFFSET+index_SSE2+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__strrchr_sse2_bsf@GOTOFF(%ebx), %eax
+	testl	$bit_Slow_BSF, FEATURE_OFFSET+index_Slow_BSF+__cpu_features@GOTOFF(%ebx)
+	jz	2f
+	leal	__strrchr_sse2@GOTOFF(%ebx), %eax
+2:	popl	%ebx
+	cfi_adjust_cfa_offset (-4);
+	cfi_restore (ebx)
+	ret
+END(strrchr)
+
+# undef ENTRY
+# define ENTRY(name) \
+	.type __strrchr_ia32, @function; \
+	.globl __strrchr_ia32; \
+	.p2align 4; \
+	__strrchr_ia32: cfi_startproc; \
+	CALL_MCOUNT
+# undef END
+# define END(name) \
+	cfi_endproc; .size __strrchr_ia32, .-__strrchr_ia32
+# undef libc_hidden_builtin_def
+/* IFUNC doesn't work with the hidden functions in shared library since
+   they will be called without setting up EBX needed for PLT which is
+   used by IFUNC.  */
+# define libc_hidden_builtin_def(name) \
+	.globl __GI_strrchr; __GI_strrchr = __strrchr_ia32
+#endif
+
+#include "../../strrchr.S"

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                                      |   12 +
 NEWS                                           |    3 +
 sysdeps/i386/i686/multiarch/Makefile           |    3 +-
 sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S  |  159 ++++++
 sysdeps/i386/i686/multiarch/strchr-sse2.S      |  349 ++++++++++++
 sysdeps/i386/i686/multiarch/strchr.S           |   76 +++
 sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S |  283 ++++++++++
 sysdeps/i386/i686/multiarch/strrchr-sse2.S     |  709 ++++++++++++++++++++++++
 sysdeps/i386/i686/multiarch/strrchr.S          |   76 +++
 9 files changed, 1669 insertions(+), 1 deletions(-)
 create mode 100644 sysdeps/i386/i686/multiarch/strchr-sse2-bsf.S
 create mode 100644 sysdeps/i386/i686/multiarch/strchr-sse2.S
 create mode 100644 sysdeps/i386/i686/multiarch/strchr.S
 create mode 100644 sysdeps/i386/i686/multiarch/strrchr-sse2-bsf.S
 create mode 100644 sysdeps/i386/i686/multiarch/strrchr-sse2.S
 create mode 100644 sysdeps/i386/i686/multiarch/strrchr.S


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]