This is the mail archive of the glibc-cvs@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

GNU C Library master sources branch, master, updated. glibc-2.14-459-g52e4b9e


This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU C Library master sources".

The branch, master has been updated
       via  52e4b9eb626179110fdfa6e16c32fce30805ff80 (commit)
      from  fd52bc6dc4bfb844995cc63d98682970de1c9fed (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://sources.redhat.com/git/gitweb.cgi?p=glibc.git;a=commitdiff;h=52e4b9eb626179110fdfa6e16c32fce30805ff80

commit 52e4b9eb626179110fdfa6e16c32fce30805ff80
Author: Ulrich Drepper <drepper@gmail.com>
Date:   Fri Oct 28 19:01:48 2011 -0400

    More cleanups of x86-64 strstr

diff --git a/ChangeLog b/ChangeLog
index feb3567..6ae84cb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -4,6 +4,8 @@
 	the three constants needed as parameters.  Drop the others.
 	(strcasestr_sse42): Load uclow, uchigh, and lcqword and pass to
 	__m128i_strloadu_tolower.
+	Create and initialize variable zero and use it in all the places
+	where _mm_setzero_si128 was used.
 
 	* sysdeps/x86_64/fpu/multiarch/Makefile: Don't build brandred-avx.c,
 	doasin-avx.c, dosincos-avx.c, e_asin-avx.c, mpatan-avx.c,
diff --git a/sysdeps/x86_64/multiarch/strstr.c b/sysdeps/x86_64/multiarch/strstr.c
index 6e93744..36dc676 100644
--- a/sysdeps/x86_64/multiarch/strstr.c
+++ b/sysdeps/x86_64/multiarch/strstr.c
@@ -88,14 +88,12 @@
    cross to next page.  */
 
 static inline __m128i
-__m128i_strloadu (const unsigned char * p)
+__m128i_strloadu (const unsigned char * p, __m128i zero)
 {
-  int offset = ((size_t) p & (16 - 1));
-
-  if (offset && (int) ((size_t) p & 0xfff) > 0xff0)
+  if (__builtin_expect ((int) ((size_t) p & 0xfff) > 0xff0, 0))
     {
+      size_t offset = ((size_t) p & (16 - 1));
       __m128i a = _mm_load_si128 ((__m128i *) (p - offset));
-      __m128i zero = _mm_setzero_si128 ();
       int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero));
       if ((bmsk >> offset) != 0)
 	return __m128i_shift_right (a, offset);
@@ -109,10 +107,10 @@ __m128i_strloadu (const unsigned char * p)
    locale and other which have single-byte letters only in the ASCII
    range.  */
 static inline __m128i
-__m128i_strloadu_tolower (const unsigned char *p, __m128i uclow,
+__m128i_strloadu_tolower (const unsigned char *p, __m128i zero, __m128i uclow,
 			  __m128i uchigh, __m128i lcqword)
 {
-  __m128i frag = __m128i_strloadu (p);
+  __m128i frag = __m128i_strloadu (p, zero);
 
   /* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'.  */
   __m128i r2 = _mm_cmpgt_epi8 (uchigh, frag);
@@ -191,12 +189,15 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
   const __m128i uclow = _mm_set1_epi8 (0x40);
   const __m128i uchigh = _mm_set1_epi8 (0x5b);
   const __m128i lcqword = _mm_set1_epi8 (0x20);
-#  define strloadu(p) __m128i_strloadu_tolower (p, uclow, uchigh, lcqword)
+  const __m128i zero = _mm_setzero_si128 ();
+#  define strloadu(p) __m128i_strloadu_tolower (p, zero, uclow, uchigh, lcqword)
 # else
 #  define strloadu __m128i_strloadu_tolower
+#  define zero _mm_setzero_si128 ()
 # endif
 #else
-# define strloadu __m128i_strloadu
+# define strloadu(p) __m128i_strloadu (p, zero)
+  const __m128i zero = _mm_setzero_si128 ();
 #endif
 
   /* p1 > 1 byte long.  Load up to 16 bytes of fragment.  */
@@ -207,7 +208,7 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
     /* p2 is > 1 byte long.  */
     frag2 = strloadu (p2);
   else
-    frag2 = _mm_insert_epi8 (_mm_setzero_si128 (), LOADBYTE (p2[0]), 0);
+    frag2 = _mm_insert_epi8 (zero, LOADBYTE (p2[0]), 0);
 
   /* Unsigned bytes, equal order, does frag2 has null?  */
   int cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
@@ -216,8 +217,7 @@ STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
   int cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
   if (cmp_s & cmp_c)
     {
-      int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2,
-						    _mm_setzero_si128 ()));
+      int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero));
       int len;
       __asm ("bsfl %[bmsk], %[len]"
 	     : [len] "=r" (len) : [bmsk] "r" (bmsk));
@@ -343,7 +343,6 @@ re_trace:
 
       /* Handle both zero and sign flag set and s1 is shorter in
 	 length.  */
-      __m128i zero = _mm_setzero_si128 ();
       int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2));
       int bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1));
       int len;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog                         |    2 ++
 sysdeps/x86_64/multiarch/strstr.c |   25 ++++++++++++-------------
 2 files changed, 14 insertions(+), 13 deletions(-)


hooks/post-receive
-- 
GNU C Library master sources


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]