This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix regex x{0}<dup_op> handling


On Sun, Dec 21, 2003 at 02:07:59AM +0100, Rene Rebe wrote:
> The first is a seg-fault while parsing, e.g. s.th. like:
> 
> grep -e 'typeof.*;\{0\}*'

Fixed by patch below.
Whenever one of the consecutive dup operators is {0} or {0,0},
we should treat all of them like if there was a single {0} dup operator.

2003-12-21  Jakub Jelinek  <jakub@redhat.com>

	* posix/regcomp.c (parse_dup_op): Return NULL if dup_elem is NULL,
	after checking syntax.  Optimize.
	(calc_first): Fix comment.
	* posix/bug-regex13.c (tests): Add new tests.

--- libc/posix/regcomp.c.jj	2003-12-16 23:56:58.000000000 +0100
+++ libc/posix/regcomp.c	2003-12-21 13:15:16.000000000 +0100
@@ -1157,7 +1157,7 @@ calc_first (dfa, node)
     case OP_CLOSE_EQUIV_CLASS:
     case OP_OPEN_CHAR_CLASS:
     case OP_CLOSE_CHAR_CLASS:
-      /* These must not be appeared here.  */
+      /* These must not appear here.  */
       assert (0);
 #endif
     case END_OF_RE:
@@ -2443,23 +2443,23 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	  else
 	    goto parse_dup_op_ebrace;
 	}
-      if (BE (start == 0 && end == 0, 0))
+      if (BE ((start == 0 && end == 0) || tree == NULL, 0))
 	{
-	  /* We treat "<re>{0}" and "<re>{0,0}" as null string.  */
+	  /* We treat "<re>{0}" and "<re>{0,0}" as null string.
+	     Similarly "<re>{0}{m,n}".  */
 	  fetch_token (token, regexp, syntax);
 	  return NULL;
 	}
 
       /* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}".  */
       elem = tree;
-      for (i = 0; i < start; ++i)
-	if (i != 0)
-	  {
-	    work_tree = duplicate_tree (elem, dfa);
-	    tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
-	    if (BE (work_tree == NULL || tree == NULL, 0))
-	      goto parse_dup_op_espace;
-	  }
+      for (i = 1; i < start; ++i)
+	{
+	  work_tree = duplicate_tree (elem, dfa);
+	  tree = create_tree (dfa, tree, work_tree, CONCAT, 0);
+	  if (BE (work_tree == NULL || tree == NULL, 0))
+	    goto parse_dup_op_espace;
+	}
 
       if (end == -1)
 	{
@@ -2516,6 +2516,9 @@ parse_dup_op (dup_elem, regexp, dfa, tok
 	    }
 	}
     }
+  /* Treat "<re>{0}*" etc. as "<re>{0}".  */
+  else if (tree == NULL)
+    ;
   else
     {
       tree = re_dfa_add_tree_node (dfa, tree, NULL, token);
--- libc/posix/bug-regex13.c.jj	2002-12-02 00:16:53.000000000 +0100
+++ libc/posix/bug-regex13.c	2003-12-21 12:18:35.000000000 +0100
@@ -34,7 +34,33 @@ static struct
 } tests[] = {
   {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "1", -1}, /* It should not match.  */
   {RE_BACKSLASH_ESCAPE_IN_LISTS, "[0\\-9]", "-", 0}, /* It should match.  */
-  {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0}
+  {RE_SYNTAX_POSIX_BASIC, "s1\n.*\ns3", "s1\ns2\ns3", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}c", "abbc", -1},
+  /* Nested duplication.  */
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "ac", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abc", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{1}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "ac", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbc", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{2}{2}c", "abbbbbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}{1}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{1}{0}c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}*c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}?c", "abbc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "ac", 0},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abc", -1},
+  {RE_SYNTAX_POSIX_EXTENDED, "ab{0}+c", "abbc", -1},
 };
 
 int


	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]