This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC 1/*] free memory asynchronously.


Hi,

To move faster with malloc I will start posting what I have in uglier
form which would be prettier if we did refactorings.

As I mentioned previously that in fastbins atomic part and locking of
malloc is really not needed as memory could be returned asynchronously.

A minimal patch to add these is following which just add alloc/free
logic.

This as consequences makes lot of int_free code dead which could be
removed. Then fastbin could use ordinary operations instead atomic ones.

It also makes locking unnecessary in most of functions which would be
easy with locking logic factored away.

A strategy I choose here is very simple, I could make atomic usage less
frequent by tracking how much memory is pending and return once given
treshold accumulates, and memory reclamation could be once per 10
allocation or so. That is topic of additional optimization that will be
done later.

Comments?

diff --git a/malloc/malloc.c b/malloc/malloc.c
index 74ad92d..eea4ca6 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1696,6 +1696,11 @@ struct malloc_state
   /* Memory allocated from the system in this arena.  */
   INTERNAL_SIZE_T system_mem;
   INTERNAL_SIZE_T max_system_mem;
+
+  /* Asynchronously returned memory is placed here.  It is linked list and
+     free atomically appends it while malloc atomically swaps it with
+     empty list.  */
+  mchunkptr returned;
 };
 
 struct malloc_par
@@ -3176,6 +3181,28 @@ __libc_calloc (size_t n, size_t elem_size)
    ------------------------------ malloc ------------------------------
  */
 
+  /* Reclaim asynchronously returned memory. */
+static void
+_async_reclaim (mstate av)
+{
+  mchunkptr p = av->returned, tmp;
+  if (p)
+    {
+      /* We try to atomicaly swap list with empty one.  This could fail when
+         contended with free.  Then we wait for another malloc.  */
+
+      if (catomic_compare_and_exchange_bool_acq (&(av->returned), NULL, p))
+        {
+          while (p)
+            {
+              tmp = p->fd;
+              _int_free (av, p, 1);
+              p = tmp;
+            }
+        }
+    }
+}
+
 static void *
 _int_malloc (mstate av, size_t bytes)
 {
@@ -3199,6 +3226,9 @@ _int_malloc (mstate av, size_t bytes)
 
   const char *errstr = NULL;
 
+  /* Check if there is asynchronously returned memory.  */
+  _async_reclaim (av);
+
   /*
      Convert request size to internal form by adding SIZE_SZ bytes
      overhead plus possibly more to obtain necessary alignment and/or
@@ -3688,6 +3718,25 @@ _int_malloc (mstate av, size_t bytes)
    ------------------------------ free ------------------------------
  */
 
+  /* Return memory asynchronously.  */
+static void
+_async_free (mstate av, mchunkptr p)
+{
+  mchunkptr *r = &(av->returned);
+
+  /* Append to linked list.  As future improvement we could maintain a linked
+     lists for each other arena and use separate field for this appending.  */
+
+  mchunkptr old = *r, old2;
+  do
+    {
+      p->fd = old2 = old;
+    }
+  while ((old = catomic_compare_and_exchange_val_rel (r, p, old2)) != old2);
+}
+
 static void
 _int_free (mstate av, mchunkptr p, int have_lock)
 {
@@ -3705,6 +3754,14 @@ _int_free (mstate av, mchunkptr p, int have_lock)
 
   size = chunksize (p);
 
+  if (!have_lock)
+    {
+      _async_free (av, p);
+      return;
+    }
+
   /* Little security check which won't hurt performance: the
      allocator never wrapps around at the end of the address space.
      Therefore we can exclude some size values which might appear


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]