This is the mail archive of the libc-hacker@sources.redhat.com mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Fix most of bug-regex11.c back-reference failures


Hi!

I must say I'm not 100% sure about this (haven't spent enough time on it
yet to actually understand prune_impossible_nodes fully), but it looks
correct.  When check_subexp_limits does not find any OP_CLOSE_SUBEXPs
in dest_nodes, cls_node is -1.  For ops_node check_subexp_limits
checks that ops_node >= 0 before calling sub_epsilon_src_nodes,
but for cls_node it happily tried
                if (!re_node_set_contains (dfa->inveclosures + node,
                                           cls_node)
                    && !re_node_set_contains (dfa->eclosures + node,
                                              cls_node))
which is certainly true (no node set ever contains -1) and thus removed
the node (in the test I was debugging a CHARACTER) from dest_nodes.
The comment above it is:
/* Check the limitation of the close subexpression.  */
so I'd find the comment at least inappropriate if it was checking
some limitation even if there is no close subexpression.

2003-11-26  Jakub Jelinek  <jakub@redhat.com>

	* posix/regexec.c (check_subexp_limits): Only check close
	subexpression limitation if one is found.  Formatting.
	(sift_states_backward, check_arrival, check_arrival_add_next_nodes):
	Formatting.
	* posix/bug-regex11.c (tests): Enable most #ifdefed out tests.
	Add new test.

--- libc/posix/bug-regex11.c.jj	2003-11-26 09:25:13.000000000 +0100
+++ libc/posix/bug-regex11.c	2003-11-26 15:46:18.000000000 +0100
@@ -63,14 +63,16 @@ struct
   { "()(b)c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
   { "a(b)()c\\1", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 2 }, { 2, 2 } } },
   { "a()(b)c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
-#if 0
-  /* XXX Not used since they fail so far.  */
   { "()(b)\\1c\\2", "bcb", REG_EXTENDED, 3, { { 0, 3 }, { 0, 0 }, { 0, 1 } } },
   { "(b())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 2 }, { 0, 1 }, { 1, 1 } } },
   { "a()(b)\\1c\\2", "abcb", REG_EXTENDED, 3, { { 0, 4 }, { 1, 1 }, { 1, 2 } } },
   { "a()d(b)\\1c\\2", "adbcb", REG_EXTENDED, 3, { { 0, 5 }, { 1, 1 }, { 2, 3 } } },
   { "a(b())\\2\\1", "abbbb", REG_EXTENDED, 3, { { 0, 3 }, { 1, 2 }, { 2, 2 } } },
   { "(bb())\\2\\1", "bbbb", REG_EXTENDED, 3, { { 0, 4 }, { 0, 2 }, { 2, 2 } } },
+  { "^(.?)(.?)(.?)(.?)(.?).?\\5\\4\\3\\2\\1$",
+    "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
+#if 0
+  /* XXX Not used since they fail so far.  */
   { "^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$",
     "level", REG_NOSUB | REG_EXTENDED, 0, { { -1, -1 } } },
 #endif
--- libc/posix/regexec.c.jj	2003-11-26 09:25:16.000000000 +0100
+++ libc/posix/regexec.c	2003-11-26 15:40:13.000000000 +0100
@@ -1439,7 +1439,7 @@ sift_states_backward (preg, mctx, sctx)
 	  int naccepted = 0;
 	  re_token_type_t type = dfa->nodes[prev_node].type;
 
-	  if (IS_EPSILON_NODE(type))
+	  if (IS_EPSILON_NODE (type))
 	    continue;
 #ifdef RE_ENABLE_I18N
 	  /* If the node may accept `multi byte'.  */
@@ -1836,7 +1836,7 @@ check_subexp_limits (dfa, dest_nodes, ca
 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 	    {
 	      int node = dest_nodes->elems[node_idx];
-	      re_token_type_t type= dfa->nodes[node].type;
+	      re_token_type_t type = dfa->nodes[node].type;
 	      if (type == OP_OPEN_SUBEXP
 		  && subexp_idx == dfa->nodes[node].opr.idx)
 		ops_node = node;
@@ -1849,34 +1849,38 @@ check_subexp_limits (dfa, dest_nodes, ca
 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
 	  if (ops_node >= 0)
 	    {
-	      err = sub_epsilon_src_nodes(dfa, ops_node, dest_nodes,
-					  candidates);
+	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
+					   candidates);
 	      if (BE (err != REG_NOERROR, 0))
 		return err;
 	    }
+
 	  /* Check the limitation of the close subexpression.  */
-	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
-	    {
-	      int node = dest_nodes->elems[node_idx];
-	      if (!re_node_set_contains (dfa->inveclosures + node, cls_node)
-		  && !re_node_set_contains (dfa->eclosures + node, cls_node))
-		{
-		  /* It is against this limitation.
-		     Remove it form the current sifted state.  */
-		  err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
-					      candidates);
-		  if (BE (err != REG_NOERROR, 0))
-		    return err;
-		  --node_idx;
-		}
-	    }
+	  if (cls_node >= 0)
+	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
+	      {
+		int node = dest_nodes->elems[node_idx];
+		if (!re_node_set_contains (dfa->inveclosures + node,
+					   cls_node)
+		    && !re_node_set_contains (dfa->eclosures + node,
+					      cls_node))
+		  {
+		    /* It is against this limitation.
+		       Remove it form the current sifted state.  */
+		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+						 candidates);
+		    if (BE (err != REG_NOERROR, 0))
+		      return err;
+		    --node_idx;
+		  }
+	      }
 	}
       else /* (ent->subexp_to != str_idx)  */
 	{
 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
 	    {
 	      int node = dest_nodes->elems[node_idx];
-	      re_token_type_t type= dfa->nodes[node].type;
+	      re_token_type_t type = dfa->nodes[node].type;
 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
 		{
 		  if (subexp_idx != dfa->nodes[node].opr.idx)
@@ -1886,8 +1890,8 @@ check_subexp_limits (dfa, dest_nodes, ca
 		    {
 		      /* It is against this limitation.
 			 Remove it form the current sifted state.  */
-		      err = sub_epsilon_src_nodes(dfa, node, dest_nodes,
-						  candidates);
+		      err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
+						   candidates);
 		      if (BE (err != REG_NOERROR, 0))
 			return err;
 		    }
@@ -2756,8 +2760,8 @@ check_arrival (preg, mctx, path, top_nod
 	}
       if (cur_state)
 	{
-	  err = check_arrival_add_next_nodes(preg, dfa, mctx, str_idx,
-					     &cur_state->nodes, &next_nodes);
+	  err = check_arrival_add_next_nodes (preg, dfa, mctx, str_idx,
+					      &cur_state->nodes, &next_nodes);
 	  if (BE (err != REG_NOERROR, 0))
 	    {
 	      re_node_set_free (&next_nodes);
@@ -2835,7 +2839,7 @@ check_arrival_add_next_nodes (preg, dfa,
       int naccepted = 0;
       int cur_node = cur_nodes->elems[cur_idx];
       re_token_type_t type = dfa->nodes[cur_node].type;
-      if (IS_EPSILON_NODE(type))
+      if (IS_EPSILON_NODE (type))
 	continue;
 #ifdef RE_ENABLE_I18N
       /* If the node may accept `multi byte'.  */

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]