This is the mail archive of the
libc-alpha@sourceware.org
mailing list for the glibc project.
[RFC] Faster strspn/strcspn implementation.
- From: OndÅej BÃlka <neleai at seznam dot cz>
- To: libc-alpha at sourceware dot org
- Cc: hubicka at ucw dot cz
- Date: Thu, 27 Dec 2012 22:21:46 +0100
- Subject: [RFC] Faster strspn/strcspn implementation.
Hello,
I now look on speeding up strspn/strcspn.
Now I wrote a generic dispatch routine. I will post x64 specialization
soon.
There are several cases that can be optimized in platform specific way:
__strspn4( s,a0,a1,a2,a3) - when needle consist at most 4 characters.
__strspn_rng(s,from,to) - when needle is consecutive interval.
__strspn16(s,a) - when needle consist at most 16 characters.
__strspn_arf(s,a) - anything else.
In header string/bits/string2.h I changed implementation to call
__strspn4 for small needles. Currently I can not persudate gcc to
recognize intervals at compile time.
Now make check fails that libc_nonshared.a does not contain my
__strspn4. Where should add this dependence?
Ondra
---
string/Makefile | 2 +-
string/bits/string2.h | 179 +++------------------------------------------
string/strcspn.c | 52 +-------------
string/strcspn_internal.c | 2 +
string/strpbrk.c | 12 +---
string/strspn.c | 63 ++++++++++------
string/strspn_internal.c | 76 +++++++++++++++++++
7 files changed, 133 insertions(+), 253 deletions(-)
create mode 100644 string/strcspn_internal.c
create mode 100644 string/strspn_internal.c
diff --git a/string/Makefile b/string/Makefile
index 740006e..581a6f3 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -28,7 +28,7 @@ routines := strcat strchr strcmp strcoll strcpy strcspn \
strverscmp strdup strndup \
strerror _strerror strlen strnlen \
strncat strncmp strncpy \
- strrchr strpbrk strsignal strspn strstr strtok \
+ strrchr strpbrk strsignal strspn strspn_internal strstr strtok \
strtok_r strxfrm memchr memcmp memmove memset \
mempcpy bcopy bzero ffs ffsll stpcpy stpncpy \
strcasecmp strncase strcasecmp_l strncase_l \
diff --git a/string/bits/string2.h b/string/bits/string2.h
index bbf05a3..cc8d9d0 100644
--- a/string/bits/string2.h
+++ b/string/bits/string2.h
@@ -925,6 +925,9 @@ __stpcpy_small (char *__dest,
? strcmp (s1, s2) : strncmp (s1, s2, n)))
#endif
+size_t __strspn4 (const char *a,unsigned char a0,unsigned char a1,unsigned char a2,unsigned char a3);
+size_t __strcspn4 (const char *a,unsigned char a0,unsigned char a1,unsigned char a2,unsigned char a3);
+
/* Return the length of the initial segment of S which
consists entirely of characters not in REJECT. */
@@ -940,11 +943,11 @@ __stpcpy_small (char *__dest,
: ((__r0 = ((const char *) (reject))[0], __r0 == '\0') \
? strlen (s) \
: ((__r1 = ((const char *) (reject))[1], __r1 == '\0') \
- ? __strcspn_c1 (s, __r0) \
+ ? strchrnul (s,__r0) - s \
: ((__r2 = ((const char *) (reject))[2], __r2 == '\0') \
- ? __strcspn_c2 (s, __r0, __r1) \
+ ? __strcspn4 (s, __r0, __r1, 0 , 0) \
: (((const char *) (reject))[3] == '\0' \
- ? __strcspn_c3 (s, __r0, __r1, __r2) \
+ ? __strcspn4 (s, __r0, __r1, __r2 , 0) \
: __builtin_strcspn (s, reject)))))) \
: __builtin_strcspn (s, reject)); })
# else
@@ -964,182 +967,24 @@ __stpcpy_small (char *__dest,
: strcspn (s, reject)); })
# endif
# endif
-
-__STRING_INLINE size_t __strcspn_c1 (const char *__s, int __reject);
-__STRING_INLINE size_t
-__strcspn_c1 (const char *__s, int __reject)
-{
- register size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c2 (const char *__s, int __reject1,
- int __reject2);
-__STRING_INLINE size_t
-__strcspn_c2 (const char *__s, int __reject1, int __reject2)
-{
- register size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject1
- && __s[__result] != __reject2)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strcspn_c3 (const char *__s, int __reject1,
- int __reject2, int __reject3);
-__STRING_INLINE size_t
-__strcspn_c3 (const char *__s, int __reject1, int __reject2,
- int __reject3)
-{
- register size_t __result = 0;
- while (__s[__result] != '\0' && __s[__result] != __reject1
- && __s[__result] != __reject2 && __s[__result] != __reject3)
- ++__result;
- return __result;
-}
#endif
/* Return the length of the initial segment of S which
consists entirely of characters in ACCEPT. */
#if !defined _HAVE_STRING_ARCH_strspn || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strspn
-# if __GNUC_PREREQ (3, 2)
-# define strspn(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strspn (s, accept) \
- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (size_t) 0) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __strspn_c1 (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strspn_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strspn_c3 (s, __a0, __a1, __a2) \
- : __builtin_strspn (s, accept)))))) \
- : __builtin_strspn (s, accept)); })
-# else
-# define strspn(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (size_t) 0) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __strspn_c1 (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strspn_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strspn_c3 (s, __a0, __a1, __a2) \
- : strspn (s, accept))))) \
- : strspn (s, accept)); })
-# endif
-# endif
-
-__STRING_INLINE size_t __strspn_c1 (const char *__s, int __accept);
-__STRING_INLINE size_t
-__strspn_c1 (const char *__s, int __accept)
-{
- register size_t __result = 0;
- /* Please note that __accept never can be '\0'. */
- while (__s[__result] == __accept)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strspn_c2 (const char *__s, int __accept1,
- int __accept2);
-__STRING_INLINE size_t
-__strspn_c2 (const char *__s, int __accept1, int __accept2)
-{
- register size_t __result = 0;
- /* Please note that __accept1 and __accept2 never can be '\0'. */
- while (__s[__result] == __accept1 || __s[__result] == __accept2)
- ++__result;
- return __result;
-}
-
-__STRING_INLINE size_t __strspn_c3 (const char *__s, int __accept1,
- int __accept2, int __accept3);
-__STRING_INLINE size_t
-__strspn_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
-{
- register size_t __result = 0;
- /* Please note that __accept1 to __accept3 never can be '\0'. */
- while (__s[__result] == __accept1 || __s[__result] == __accept2
- || __s[__result] == __accept3)
- ++__result;
- return __result;
-}
#endif
/* Find the first occurrence in S of any character in ACCEPT. */
-#if !defined _HAVE_STRING_ARCH_strpbrk || defined _FORCE_INLINES
-# ifndef _HAVE_STRING_ARCH_strpbrk
-# if __GNUC_PREREQ (3, 2)
-# define strpbrk(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__builtin_constant_p (s) && __string2_1bptr_p (s)) \
- ? __builtin_strpbrk (s, accept) \
- : ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (char *) NULL) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? __builtin_strchr (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strpbrk_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strpbrk_c3 (s, __a0, __a1, __a2) \
- : __builtin_strpbrk (s, accept)))))) \
- : __builtin_strpbrk (s, accept)); })
-# else
-# define strpbrk(s, accept) \
- __extension__ \
- ({ char __a0, __a1, __a2; \
- (__builtin_constant_p (accept) && __string2_1bptr_p (accept) \
- ? ((__a0 = ((const char *) (accept))[0], __a0 == '\0') \
- ? ((void) (s), (char *) NULL) \
- : ((__a1 = ((const char *) (accept))[1], __a1 == '\0') \
- ? strchr (s, __a0) \
- : ((__a2 = ((const char *) (accept))[2], __a2 == '\0') \
- ? __strpbrk_c2 (s, __a0, __a1) \
- : (((const char *) (accept))[3] == '\0' \
- ? __strpbrk_c3 (s, __a0, __a1, __a2) \
- : strpbrk (s, accept))))) \
- : strpbrk (s, accept)); })
-# endif
-# endif
-
-__STRING_INLINE char *__strpbrk_c2 (const char *__s, int __accept1,
- int __accept2);
-__STRING_INLINE char *
-__strpbrk_c2 (const char *__s, int __accept1, int __accept2)
-{
- /* Please note that __accept1 and __accept2 never can be '\0'. */
- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2)
- ++__s;
- return *__s == '\0' ? NULL : (char *) (size_t) __s;
-}
-
-__STRING_INLINE char *__strpbrk_c3 (const char *__s, int __accept1,
- int __accept2, int __accept3);
-__STRING_INLINE char *
-__strpbrk_c3 (const char *__s, int __accept1, int __accept2, int __accept3)
+extern inline char *
+strpbrk (s, accept)
+ const char *s;
+ const char *accept;
{
- /* Please note that __accept1 to __accept3 never can be '\0'. */
- while (*__s != '\0' && *__s != __accept1 && *__s != __accept2
- && *__s != __accept3)
- ++__s;
- return *__s == '\0' ? NULL : (char *) (size_t) __s;
+ size_t i = strcspn (s, accept);
+ return s[i] ? (char *) s+i : NULL;
}
-#endif
/* Find the first occurrence of NEEDLE in HAYSTACK. Newer gcc versions
diff --git a/string/strcspn.c b/string/strcspn.c
index 6290429..cf71f93 100644
--- a/string/strcspn.c
+++ b/string/strcspn.c
@@ -1,50 +1,2 @@
-/* Copyright (C) 1991, 1994, 1996, 1997, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, see
- <http://www.gnu.org/licenses/>. */
-
-#if HAVE_CONFIG_H
-# include <config.h>
-#endif
-
-#if defined _LIBC || HAVE_STRING_H
-# include <string.h>
-#else
-# include <strings.h>
-# ifndef strchr
-# define strchr index
-# endif
-#endif
-
-#undef strcspn
-
-/* Return the length of the maximum initial segment of S
- which contains no characters from REJECT. */
-size_t
-strcspn (s, reject)
- const char *s;
- const char *reject;
-{
- size_t count = 0;
-
- while (*s != '\0')
- if (strchr (reject, *s++) == NULL)
- ++count;
- else
- return count;
-
- return count;
-}
-libc_hidden_builtin_def (strcspn)
+#define AS_STRCSPN
+#include "strspn.c"
diff --git a/string/strcspn_internal.c b/string/strcspn_internal.c
new file mode 100644
index 0000000..1788997
--- /dev/null
+++ b/string/strcspn_internal.c
@@ -0,0 +1,2 @@
+#define AS_STRCSPN
+#include "strspn_internal.c"
diff --git a/string/strpbrk.c b/string/strpbrk.c
index 7f45fdf..4015ced 100644
--- a/string/strpbrk.c
+++ b/string/strpbrk.c
@@ -31,15 +31,7 @@ strpbrk (s, accept)
const char *s;
const char *accept;
{
- while (*s != '\0')
- {
- const char *a = accept;
- while (*a != '\0')
- if (*a++ == *s)
- return (char *) s;
- ++s;
- }
-
- return NULL;
+ size_t i = strcspn (s, accept);
+ return s[i] ? (char *) s+i : NULL;
}
libc_hidden_builtin_def (strpbrk)
diff --git a/string/strspn.c b/string/strspn.c
index 48624aa..5237ac1 100644
--- a/string/strspn.c
+++ b/string/strspn.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 1997, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -15,32 +15,45 @@
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
+#ifdef AS_STRCSPN
+#define SPN_CSPN(x,y) y
+#else
+#define SPN_CSPN(x,y) x
+#endif
+#define SPN SPN_CSPN (__strspn ,__strcspn)
+#define SPN4 SPN_CSPN (__strspn4 ,__strcspn4)
+#define SPN16 SPN_CSPN (__strspn16 ,__strcspn16)
+#define SPN_ARY SPN_CSPN (__strspn_ary ,__strcspn_ary)
+#define SPN_RNG SPN_CSPN (__strspn_rng ,__strcspn_rng)
+
+#define _GNU_SOURCE
#include <string.h>
-#undef strspn
-/* Return the length of the maximum initial segment
- of S which contains only characters in ACCEPT. */
-size_t
-strspn (s, accept)
- const char *s;
- const char *accept;
+size_t SPN (const char *_s,const char *_n)
{
- const char *p;
- const char *a;
- size_t count = 0;
-
- for (p = s; *p != '\0'; ++p)
- {
- for (a = accept; *a != '\0'; ++a)
- if (*p == *a)
- break;
- if (*a == '\0')
- return count;
- else
- ++count;
- }
-
- return count;
+ size_t i;
+ unsigned char *s = (unsigned char*)_s, *n = (unsigned char*)_n;
+#ifdef AS_STRCSPN
+ if (!n[0]) return strlen(_s);
+ if (!n[1]) return strchrnul (_s,_n[0])-_s;
+ if (!n[2]) return SPN4 (s,n[0],n[1],0 ,0);
+ if (!n[3]) return SPN4 (s,n[0],n[1],n[2],0);
+#else
+ if (!n[0]) return 0;
+ if (!n[1]) return SPN4 (s,n[0],n[0],n[0],n[0]);
+ if (!n[2]) return SPN4 (s,n[0],n[1],n[0],n[0]);
+ if (!n[3]) return SPN4 (s,n[0],n[1],n[2],n[0]);
+ if (!n[4]) return SPN4 (s,n[0],n[1],n[2],n[3]);
+#endif
+ for (i = 0; n[i] == n[0]+i; i++)
+ ;
+ if (!n[i])
+ return SPN_RNG (s,n[0],n[0]+i-1);
+ if (strlen (n)<16)
+ return SPN16 (s,n);
+ return SPN_ARY (s,n);
}
-libc_hidden_builtin_def (strspn)
+
+libc_hidden_builtin_def (SPN)
+weak_alias(SPN, SPN_CSPN (strspn,strcspn))
diff --git a/string/strspn_internal.c b/string/strspn_internal.c
new file mode 100644
index 0000000..3962925
--- /dev/null
+++ b/string/strspn_internal.c
@@ -0,0 +1,76 @@
+/* Copyright (C) 2012 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+
+#ifdef AS_STRCSPN
+#define SPN_CSPN(x,y) y
+#else
+#define SPN_CSPN(x,y) x
+#endif
+#define SPN SPN_CSPN( strspn , strcspn)
+#define SPN4 SPN_CSPN(__strspn4 ,__strcspn4)
+#define SPN16 SPN_CSPN(__strspn16 ,__strcspn16)
+#define SPN_ARY SPN_CSPN(__strspn_ary ,__strcspn_ary)
+#define SPN_RNG SPN_CSPN(__strspn_rng ,__strcspn_rng)
+
+#include <string.h>
+
+size_t SPN_ARY (unsigned char* s,unsigned char* n)
+{
+ size_t i;
+ unsigned char ac,chars[256];
+ memset (chars,0,256);
+ for (i = 0; n[i]; i++)
+ chars[n[i]] = 1;
+ ac = chars[0] = SPN_CSPN (0,1);
+ i = 0;
+ while (1)
+ {
+ if (chars[s[i++]]==ac) return i-1;
+ if (chars[s[i++]]==ac) return i-1;
+ if (chars[s[i++]]==ac) return i-1;
+ if (chars[s[i++]]==ac) return i-1;
+ }
+}
+
+size_t SPN16 (unsigned char *s,unsigned char *n)
+{
+ return SPN_ARY (s,n);
+}
+
+size_t SPN4 ( const char *_s, unsigned char n0,unsigned char n1,unsigned char n2,unsigned char n3)
+{
+ unsigned char *s = (unsigned char *) _s;
+ size_t i;
+ for (i = 0;; i++)
+ {
+ if SPN_CSPN ( (s[i] != n0 && s[i] != n0 && s[i] != n2 && s[i] != n3) ,
+ (!(s[i] != n0 && s[i] != n0 && s[i] != n2 && s[i] != n3)))
+ return i;
+ }
+}
+
+size_t SPN_RNG (unsigned char *s,unsigned char from,unsigned char to)
+{
+ size_t i;
+ for (i = 0;; i++)
+ {
+ if (SPN_CSPN ( (s[i] < from || s[i] > to),
+ ((s[i] >= from && s[i] <= to) || s[i] == 0)))
+ return i;
+ }
+}
--
1.7.4.4