[PATCH] Fix strstr bug with huge needles (bug 23637)

Wilco Dijkstra Wilco.Dijkstra@arm.com
Wed Sep 12 13:02:00 GMT 2018


As reported in https://sourceware.org/ml/libc-help/2018-09/msg00000.html , 
the generic strstr in GLIBC 2.28 fails to match huge needles.  The optimized
AVAILABLE macro reads ahead a large fixed amount to reduce the overhead of
repeatedly checking for the end of the string.  However if the needle length
is larger than this, two_way_long_needle may confuse this as meaning the end
of the string and return NULL.  This is fixed by adding the needle length to
the amount to read ahead.

Passes GLIBC regress on AArch64, new testcase now passes.

ChangeLog:
2018-06-30  Wilco Dijkstra  <wdijkstr@arm.com>

	[BZ #21286]
	* string/Makefile: Add bug-strstr1.
	* string/bug-strstr1.c: New test.
	* string/strcasestr.c (AVAILABLE): Fix readahead distance.
	* string/strstr.c (AVAILABLE): Likewise.
---

diff --git a/string/Makefile b/string/Makefile
index 680431f92185f9143710809b4105e12daa2daaa5..47f901784dcce397fc5f9887fd7e55b618c6ea80 100644
--- a/string/Makefile
+++ b/string/Makefile
@@ -59,7 +59,7 @@ tests		:= tester inl-tester noinl-tester testcopy test-ffs	\
 		   bug-envz1 tst-strxfrm2 tst-endian tst-svc2		\
 		   tst-strtok_r bug-strcoll2 tst-cmp tst-xbzero-opt	\
 		   test-endian-types test-endian-file-scope		\
-		   test-endian-sign-conversion
+		   test-endian-sign-conversion bug-strstr1
 
 # This test allocates a lot of memory and can run for a long time.
 xtests = tst-strcoll-overflow
diff --git a/string/bug-strstr1.c b/string/bug-strstr1.c
new file mode 100644
index 0000000000000000000000000000000000000000..02d02f9298fa87154ec1be07a2a318f4a5f72a05
--- /dev/null
+++ b/string/bug-strstr1.c
@@ -0,0 +1,32 @@
+/* See BZ #23637.  */
+#include <string.h>
+#include <stdio.h>
+
+#define N 1024
+
+int
+do_test (void)
+{
+  puts ("test strstr huge needle");
+  char h[N * 2 + 1];
+  char n[N + 1];
+  char *s;
+  int i;
+
+  for (i = 0; i < N; i++)
+    {
+      n[i] = 'x';
+      h[i] = ' ';
+      h[i + N] = 'x';
+    }
+  n[N] = '\0';
+  h[N * 2] = '\0';
+
+  /* Ensure we don't match at the first 'x'.  */
+  h[0] = 'x';
+
+  s = strstr (h, n);
+  return s == NULL || strcmp (s, n) != 0;
+}
+
+#include <support/test-driver.c>
diff --git a/string/strcasestr.c b/string/strcasestr.c
index 1f6b7b846f81d0d8fe77b390062d2d86be11b056..8aa76037dcc052f34ee4a8594e017ec822ce892b 100644
--- a/string/strcasestr.c
+++ b/string/strcasestr.c
@@ -37,8 +37,9 @@
 /* Two-Way algorithm.  */
 #define RETURN_TYPE char *
 #define AVAILABLE(h, h_l, j, n_l)			\
-  (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
-			      (j) + (n_l) <= (h_l)))
+  (((j) + (n_l) <= (h_l)) \
+   || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
+       (j) + (n_l) <= (h_l)))
 #define CHECK_EOL (1)
 #define RET0_IF_0(a) if (!a) goto ret0
 #define CANON_ELEMENT(c) TOLOWER (c)
diff --git a/string/strstr.c b/string/strstr.c
index 33acdc5442d3e9031298a56e4f52a19e2ffa7d84..f74d7189ed1319f6225525cc2d32380745de1523 100644
--- a/string/strstr.c
+++ b/string/strstr.c
@@ -33,8 +33,9 @@
 
 #define RETURN_TYPE char *
 #define AVAILABLE(h, h_l, j, n_l)			\
-  (((j) + (n_l) <= (h_l)) || ((h_l) += __strnlen ((void*)((h) + (h_l)), 512), \
-			      (j) + (n_l) <= (h_l)))
+  (((j) + (n_l) <= (h_l)) \
+   || ((h_l) += __strnlen ((void*)((h) + (h_l)), (n_l) + 512), \
+       (j) + (n_l) <= (h_l)))
 #define CHECK_EOL (1)
 #define RET0_IF_0(a) if (!a) goto ret0
 #define FASTSEARCH(S,C,N) (void*) strchr ((void*)(S), (C))



More information about the Libc-alpha mailing list