This is the mail archive of the libc-hacker@sourceware.org mailing list for the glibc project.

Note that libc-hacker is a closed list. You may look at the archives of this list, but subscription and posting are not open.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Handle really large number of files in glob_in_dir (BZ#3253)


Hi!

This is my proposed patch for the BZ#3253 issue.  Allocating a pointer pair
with alloca one at a time is overkill and on some arches really bad
(on some arches each alloca allocates quite a big fixed area in addition
to the requested size), on the other side we want at least some initial
allocations done with alloca for speed and also copying memory around
on realloc would be costly.  So, this patch uses a chain of (decreasingly
big) arrays of pointers, the last few of them allocated with alloca and
the really big ones with malloc.  The last array is even in a local
variable, so that the allocation code can be only in one place (the other
two need just one pointer entry).

2006-09-25  Jakub Jelinek  <jakub@redhat.com>

	[BZ #3253]
	* posix/glob.c (glob_in_dir): Don't alloca one struct globlink at a
	time, rather allocate increasingly bigger arrays of pointers, if
	possible with alloca, if too large with malloc.  Reported by
	Petr Baudys <pasky@suse.cz>.

--- libc/posix/glob.c.jj	2006-01-11 16:55:30.000000000 +0100
+++ libc/posix/glob.c	2006-09-25 13:06:07.000000000 +0200
@@ -1090,16 +1090,20 @@ glob_in_dir (const char *pattern, const 
 {
   size_t dirlen = strlen (directory);
   void *stream = NULL;
-  struct globlink
+  struct globnames
     {
-      struct globlink *next;
-      char *name;
+      struct globnames *next;
+      size_t count;
+      char *name[16];
     };
-  struct globlink *names = NULL;
-  size_t nfound;
+  struct globnames init_names;
+  struct globnames *names = &init_names, *names_alloca = &init_names;
+  size_t nfound = 0, allocasize = sizeof (init_names), cur = 0;
   int meta;
   int save;
 
+  init_names.next = NULL;
+  init_names.count = 16;
   meta = __glob_pattern_p (pattern, !(flags & GLOB_NOESCAPE));
   if (meta == 0 && (flags & (GLOB_NOCHECK|GLOB_NOMAGIC)))
     {
@@ -1107,7 +1111,6 @@ glob_in_dir (const char *pattern, const 
 	 characters and we must not return an error therefore the
 	 result will always contain exactly one name.  */
       flags |= GLOB_NOCHECK;
-      nfound = 0;
     }
   else if (meta == 0 &&
 	   ((flags & GLOB_NOESCAPE) || strchr (pattern, '\\') == NULL))
@@ -1128,8 +1131,6 @@ glob_in_dir (const char *pattern, const 
 	/* We found this file to be existing.  Now tell the rest
 	   of the function to copy this name into the result.  */
 	flags |= GLOB_NOCHECK;
-
-      nfound = 0;
     }
   else
     {
@@ -1137,12 +1138,10 @@ glob_in_dir (const char *pattern, const 
 	{
 	  /* This is a special case for matching directories like in
 	     "*a/".  */
-	  names = (struct globlink *) __alloca (sizeof (struct globlink));
-	  names->name = (char *) malloc (1);
-	  if (names->name == NULL)
+	  names->name[cur] = (char *) malloc (1);
+	  if (names->name[cur] == NULL)
 	    goto memory_error;
-	  names->name[0] = '\0';
-	  names->next = NULL;
+	  *names->name[cur++] = '\0';
 	  nfound = 1;
 	  meta = 0;
 	}
@@ -1157,7 +1156,6 @@ glob_in_dir (const char *pattern, const 
 		  && ((errfunc != NULL && (*errfunc) (directory, errno))
 		      || (flags & GLOB_ERR)))
 		return GLOB_ABORTED;
-	      nfound = 0;
 	      meta = 0;
 	    }
 	  else
@@ -1168,7 +1166,6 @@ glob_in_dir (const char *pattern, const 
 			       | FNM_CASEFOLD
 #endif
 			       );
-	      nfound = 0;
 	      flags |= GLOB_MAGCHAR;
 
 	      while (1)
@@ -1224,15 +1221,29 @@ glob_in_dir (const char *pattern, const 
 			  || link_exists_p (directory, dirlen, name, pglob,
 					    flags))
 			{
-			  struct globlink *new = (struct globlink *)
-			    __alloca (sizeof (struct globlink));
+			  if (cur == names->count)
+			    {
+			      struct globnames *newnames;
+			      size_t count = names->count * 2;
+			      size_t size = sizeof (struct globnames)
+					    + (count - 16) * sizeof (char *);
+			      allocasize += size;
+			      if (__libc_use_alloca (allocasize))
+				newnames = names_alloca = __alloca (size);
+			      else if ((newnames = malloc (size))
+				       == NULL)
+				goto memory_error;
+			      newnames->count = count;
+			      newnames->next = names;
+			      names = newnames;
+			      cur = 0;
+			    }
 			  len = NAMLEN (d);
-			  new->name = (char *) malloc (len + 1);
-			  if (new->name == NULL)
+			  names->name[cur] = (char *) malloc (len + 1);
+			  if (names->name[cur] == NULL)
 			    goto memory_error;
-			  *((char *) mempcpy (new->name, name, len)) = '\0';
-			  new->next = names;
-			  names = new;
+			  *((char *) mempcpy (names->name[cur++], name, len))
+			    = '\0';
 			  ++nfound;
 			}
 		    }
@@ -1245,12 +1256,10 @@ glob_in_dir (const char *pattern, const 
     {
       size_t len = strlen (pattern);
       nfound = 1;
-      names = (struct globlink *) __alloca (sizeof (struct globlink));
-      names->next = NULL;
-      names->name = (char *) malloc (len + 1);
-      if (names->name == NULL)
+      names->name[cur] = (char *) malloc (len + 1);
+      if (names->name[cur] == NULL)
 	goto memory_error;
-      *((char *) mempcpy (names->name, pattern, len)) = '\0';
+      *((char *) mempcpy (names->name[cur++], pattern, len)) = '\0';
     }
 
   if (nfound != 0)
@@ -1265,8 +1274,23 @@ glob_in_dir (const char *pattern, const 
 	goto memory_error;
       pglob->gl_pathv = new_gl_pathv;
 
-      for (; names != NULL; names = names->next)
-	pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++] = names->name;
+      while (1)
+	{
+	  size_t i;
+	  struct globnames *old = names;
+	  for (i = 0; i < cur; ++i)
+	    pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc++]
+	      = names->name[i];
+	  names = names->next;
+	  if (names == NULL)
+	    break;
+	  cur = names->count;
+	  if (old == names_alloca)
+	    names_alloca = names;
+	  else
+	    free (old);
+	}
+
       pglob->gl_pathv[pglob->gl_offs + pglob->gl_pathc] = NULL;
 
       pglob->gl_flags = flags;
@@ -1293,11 +1317,20 @@ glob_in_dir (const char *pattern, const 
       closedir (stream);
     __set_errno (save);
   }
-  while (names != NULL)
+  while (1)
     {
-      if (names->name != NULL)
-	free (names->name);
+      size_t i;
+      struct globnames *old = names;
+      for (i = 0; i < cur; ++i)
+	free (names->name[i]);
       names = names->next;
+      if (names == NULL)
+	break;
+      cur = names->count;
+      if (old == names_alloca)
+	names_alloca = names;
+      else
+	free (old);
     }
   return GLOB_NOSPACE;
 }

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]