This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/4] Detect EOL on-the-fly in strstr, strcasestr and memmem.


On 5/30/2012 5:10 AM, Maxim Kuvyrkov wrote:
> [PATCH 2/4] Detect EOL on-the-fly in strstr, strcasestr and memmem.
> 
> --
> Maxim Kuvyrkov
> CodeSourcery / Mentor Graphics
> 
> 
> 	[BZ #11607]
> 	* string/str-two-way.h (AVAILABLE1, AVAILABLE2, RET0_IF_0): New macros,
> 	define their defaults.
> 	(two_way_short_needle): Detect end-of-string on-the-fly.
> 	* string/strcasestr.c, string/strstr.c (AVAILABLE): Update.
> 	(AVAILABLE1, AVAILABLE2, RET0_IF_0, AVAILABLE_USES_J): Define.
> 	* string/bug-strcasestr1.c: New test.
> 	* string/Makefile: Run it.
> ---
>  string/Makefile          |    3 +-
>  string/bug-strcasestr1.c |   21 ++++++++++++++++++
>  string/str-two-way.h     |   52 ++++++++++++++++++++++++++++++++++++++--------
>  string/strcasestr.c      |    6 +++-
>  string/strstr.c          |    6 +++-
>  5 files changed, 74 insertions(+), 14 deletions(-)
>  create mode 100644 string/bug-strcasestr1.c
> 
> diff --git a/string/Makefile b/string/Makefile
> index 80923a2..fb97ce4 100644
> --- a/string/Makefile
> +++ b/string/Makefile
> @@ -56,7 +56,7 @@ tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
>  		   tst-strtok tst-strxfrm bug-strcoll1 tst-strfry	\
>  		   bug-strtok1 $(addprefix test-,$(strop-tests))	\
>  		   bug-envz1 tst-strxfrm2 tst-endian tst-svc2		\
> -		   bug-strstr1 bug-strchr1
> +		   bug-strstr1 bug-strcasestr1 bug-strchr1

OK. Thanks for the testcase.

>  
>  
>  include ../Rules
> @@ -74,6 +74,7 @@ CFLAGS-stratcliff.c = -fno-builtin
>  CFLAGS-test-ffs.c = -fno-builtin
>  CFLAGS-tst-inlcall.c = -fno-builtin
>  CFLAGS-bug-strstr1.c = -fno-builtin
> +CFLAGS-bug-strcasestr1.c = -fno-builtin
>  
>  ifeq ($(cross-compiling),no)
>  tests: $(objpfx)tst-svc.out
> diff --git a/string/bug-strcasestr1.c b/string/bug-strcasestr1.c
> new file mode 100644
> index 0000000..16c77c3
> --- /dev/null
> +++ b/string/bug-strcasestr1.c
> @@ -0,0 +1,21 @@

(1) Add license header please.

> +#include <stdio.h>
> +#include <string.h>
> +
> +#define TEST_FUNCTION do_test ()
> +static int
> +do_test (void)
> +{
> +  const char haystack[] = "AOKB";
> +  const char needle[] = "OK";
> +  const char *sub = strcasestr (haystack, needle);
> +
> +  if (sub == NULL)
> +    {
> +      fprintf (stderr, "BUG: didn't find \"%s\" in \"%s\"\n", needle, haystack);
> +      return 1;
> +    }
> +
> +  return 0;
> +}
> +
> +#include "../test-skeleton.c"
> diff --git a/string/str-two-way.h b/string/str-two-way.h
> index 3791404..db7f374 100644
> --- a/string/str-two-way.h
> +++ b/string/str-two-way.h
> @@ -1,5 +1,5 @@
>  /* Byte-wise substring search, using the Two-Way algorithm.
> -   Copyright (C) 2008, 2010 Free Software Foundation, Inc.
> +   Copyright (C) 2008-2012 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>     Written by Eric Blake <ebb9@byu.net>, 2008.
>  
> @@ -78,6 +78,16 @@
>  # define CMP_FUNC memcmp
>  #endif
>  
> +#ifndef AVAILABLE1
> +# define AVAILABLE1(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
> +#endif
> +#ifndef AVAILABLE2
> +# define AVAILABLE2(h, h_l, j, n_l) (1)
> +#endif
> +#ifndef RET0_IF_0
> +# define RET0_IF_0(a) /* nothing */
> +#endif
> +
>  /* Perform a critical factorization of NEEDLE, of length NEEDLE_LEN.
>     Return the index of the first byte in the right half, and set
>     *PERIOD to the global period of the right half.
> @@ -269,37 +279,58 @@ two_way_short_needle (const unsigned char *haystack, size_t haystack_len,
>  	 required, and any mismatch results in a maximal shift.  */
>        period = MAX (suffix, needle_len - suffix) + 1;
>        j = 0;
> -      while (AVAILABLE (haystack, haystack_len, j, needle_len))
> +      while (AVAILABLE1 (haystack, haystack_len, j, needle_len))
>  	{
> +	  unsigned char a;

(2) Change this from `a' to a meaningful variable name.

> +
>  	  /* TODO: The first-character loop can be sped up by adapting
>  	     longword-at-a-time implementation of memchr/strchr.  */
>  	  if (needle_suffix
> -	      != CANON_ELEMENT (haystack[suffix + j]))
> +	      != (a = CANON_ELEMENT (haystack[suffix + j])))
>  	    {
> +	      RET0_IF_0 (a);
>  	      ++j;
>  	      continue;
>  	    }
>  
>  	  /* Scan for matches in right half.  */
>  	  i = suffix + 1;
> -	  while (i < needle_len && (CANON_ELEMENT (needle[i])
> -				    == CANON_ELEMENT (haystack[i + j])))
> -	    ++i;
> +	  while (i < needle_len)
> +	    {
> +	      if (CANON_ELEMENT (needle[i])
> +		  != (a = CANON_ELEMENT (haystack[i + j])))
> +		{
> +		  RET0_IF_0 (a);
> +		  break;
> +		}
> +	      ++i;
> +	    }
>  	  if (needle_len <= i)
>  	    {
>  	      /* Scan for matches in left half.  */
>  	      i = suffix - 1;
> -	      while (i != SIZE_MAX && (CANON_ELEMENT (needle[i])
> -				       == CANON_ELEMENT (haystack[i + j])))
> -		--i;
> +	      while (i != SIZE_MAX)
> +		{
> +		  if (CANON_ELEMENT (needle[i])
> +		      != (a = CANON_ELEMENT (haystack[i + j])))
> +		    {
> +		      RET0_IF_0 (a);
> +		      break;
> +		    }
> +		  --i;
> +		}
>  	      if (i == SIZE_MAX)
>  		return (RETURN_TYPE) (haystack + j);
>  	      j += period;
>  	    }
>  	  else
>  	    j += i - suffix + 1;
> +
> +	  if (!AVAILABLE2 (haystack, haystack_len, j, needle_len))
> +	    break;
>  	}
>      }
> + ret0: __attribute__ ((unused))
>    return NULL;
>  }
>  
> @@ -436,6 +467,9 @@ two_way_long_needle (const unsigned char *haystack, size_t haystack_len,
>  }
>  
>  #undef AVAILABLE
> +#undef AVAILABLE1
> +#undef AVAILABLE2
>  #undef CANON_ELEMENT
>  #undef CMP_FUNC
> +#undef RET0_IF_0
>  #undef RETURN_TYPE
> diff --git a/string/strcasestr.c b/string/strcasestr.c
> index 9e1bde9..184ca68 100644
> --- a/string/strcasestr.c
> +++ b/string/strcasestr.c
> @@ -1,6 +1,5 @@
>  /* Return the offset of one string within another.
> -   Copyright (C) 1994, 1996-2000, 2004, 2008, 2009, 2010
> -   Free Software Foundation, Inc.
> +   Copyright (C) 1994-2012 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>  
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -44,6 +43,9 @@
>  #define AVAILABLE(h, h_l, j, n_l)			\
>    (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
>     && ((h_l) = (j) + (n_l)))
> +#define AVAILABLE1(h, h_l, j, n_l) (true)
> +#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
> +#define RET0_IF_0(a) if (!a) goto ret0
>  #define CANON_ELEMENT(c) TOLOWER (c)
>  #define CMP_FUNC(p1, p2, l)				\
>    __strncasecmp ((const char *) (p1), (const char *) (p2), l)
> diff --git a/string/strstr.c b/string/strstr.c
> index 10e6fdc..ec2a1e9 100644
> --- a/string/strstr.c
> +++ b/string/strstr.c
> @@ -1,6 +1,5 @@
>  /* Return the offset of one string within another.
> -   Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009
> -   Free Software Foundation, Inc.
> +   Copyright (C) 1994-2012 Free Software Foundation, Inc.
>     This file is part of the GNU C Library.
>  
>     The GNU C Library is free software; you can redistribute it and/or
> @@ -36,6 +35,9 @@
>  #define AVAILABLE(h, h_l, j, n_l)			\
>    (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
>     && ((h_l) = (j) + (n_l)))
> +#define AVAILABLE1(h, h_l, j, n_l) (true)
> +#define AVAILABLE2(h, h_l, j, n_l) AVAILABLE (h, h_l, j, n_l)
> +#define RET0_IF_0(a) if (!a) goto ret0
>  #include "str-two-way.h"
>  
>  #undef strstr
> 

OK with changes (1) and (2).

Cheers,
Carlos.
-- 
Carlos O'Donell
Mentor Graphics / CodeSourcery
carlos_odonell@mentor.com
carlos@codesourcery.com
+1 (613) 963 1026


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]