This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

Linuxthreads memory barrier patch.


I need some multiprocessor gurus to take a look at this.

2001-05-01  Kaz Kylheku  <kaz@ashi.footprints.net>

        Memory barrier overhaul following line by line inspection,
	inspired by e-mail from Alexander Terekhov <TEREKHOV@de.ibm.com>

        * mutex.c (pthread_once): missing memory barriers added,
	reported by Alexander.

	* pthread.c (__pthread_wait_for_restart_signal,
	__pthread_timedsuspend_new, __pthread_restart_new): added
	memory barriers ``just in case'' and for documentary value.

	* spinlock.c (__pthread_release): New inline function for releasing
	spinlock, to complement __pthread_acquire. Includes memory
	barrier prior to assignment to spinlock, and __asm __volatile
	dance to prevent reordering or optimization of the spinlock
	access.

	* spinlock.c (__pthread_unlock, __pthread_alt_lock,
	__pthread_alt_timedlock, __pthread_alt_unlock,
	__pthread_compare_and_swap): Updated to use new __pthread_release
	instead of updating spinlock directly.

	* spinlock.c (__pthread_lock, __pthread_unlock, wait_node_alloc,
	wait_node_free, wait_node_dequeue, __pthread_alt_lock,
	__pthread_alt_timedlock, __pthread_alt_unlock, __pthread_acquire):
	Memory barrier overhaul. Lots of missing memory barriers added,
	a couple needless ones removed.

	* spinlock.c (__pthread_compare_and_swap): testandset optimization
	removed, just calls __pthread_acquire, which has the new read
	barrier in it before its testandset.

diff -urp linuxthreads-ORIG/mutex.c linuxthreads/mutex.c
--- linuxthreads-ORIG/mutex.c	Tue May  1 13:56:12 2001
+++ linuxthreads/mutex.c	Tue May  1 15:52:29 2001
@@ -285,7 +285,10 @@ int __pthread_once(pthread_once_t * once
   int state_changed;

   /* Test without locking first for speed */
-  if (*once_control == DONE) return 0;
+  if (*once_control == DONE) {
+    READ_MEMORY_BARRIER();
+    return 0;
+  }
   /* Lock and test again */

   state_changed = 0;
@@ -310,6 +313,7 @@ int __pthread_once(pthread_once_t * once
     init_routine();
     pthread_cleanup_pop(0);
     pthread_mutex_lock(&once_masterlock);
+    WRITE_MEMORY_BARRIER();
     *once_control = DONE;
     state_changed = 1;
   }
diff -urp linuxthreads-ORIG/pthread.c linuxthreads/pthread.c
--- linuxthreads-ORIG/pthread.c	Tue May  1 13:56:16 2001
+++ linuxthreads/pthread.c	Tue May  1 15:46:10 2001
@@ -941,6 +941,8 @@ void __pthread_wait_for_restart_signal(p
   do {
     sigsuspend(&mask);                   /* Wait for signal */
   } while (THREAD_GETMEM(self, p_signal) !=__pthread_sig_restart);
+
+  READ_MEMORY_BARRIER(); /* See comment in __pthread_restart_new */
 }

 #if !__ASSUME_REALTIME_SIGNALS
@@ -1043,7 +1045,12 @@ __pthread_timedsuspend_old(pthread_descr

 void __pthread_restart_new(pthread_descr th)
 {
-    kill(th->p_pid, __pthread_sig_restart);
+  /* The barrier is proabably not needed, in which case it still documents
+     our assumptions. The intent is to commit previous writes to shared
+     memory so the woken thread will have a consistent view.  Complementary
+     read barriers are present to the suspend functions. */
+  WRITE_MEMORY_BARRIER();
+  kill(th->p_pid, __pthread_sig_restart);
 }

 /* There is no __pthread_suspend_new because it would just
@@ -1101,6 +1108,7 @@ __pthread_timedsuspend_new(pthread_descr
      the caller; the thread is still eligible for a restart wakeup
      so there is a race. */

+  READ_MEMORY_BARRIER(); /* See comment in __pthread_restart_new */
   return was_signalled;
 }

diff -urp linuxthreads-ORIG/spinlock.c linuxthreads/spinlock.c
--- linuxthreads-ORIG/spinlock.c	Tue May  1 13:56:19 2001
+++ linuxthreads/spinlock.c	Tue May  1 16:49:17 2001
@@ -26,6 +26,12 @@

 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
 static void __pthread_acquire(int * spinlock);
+static inline void __pthread_release(int * spinlock)
+{
+    WRITE_MEMORY_BARRIER();
+    *spinlock = __LT_SPINLOCK_INIT;
+    __asm __volatile ("" : "=m" *spinlock : "0" *spinlock);
+}
 #endif


@@ -85,6 +91,7 @@ again:
 	{
 	  if (spin_count)
 	    lock->__spinlock += (spin_count - lock->__spinlock) / 8;
+	  READ_MEMORY_BARRIER();
 	  return;
 	}
       }
@@ -139,6 +146,8 @@ again:
   /* Put back any resumes we caught that don't belong to us. */
   while (spurious_wakeup_count--)
     restart(self);
+
+  READ_MEMORY_BARRIER();
 #endif
 }

@@ -155,13 +164,14 @@ int __pthread_unlock(struct _pthread_fas
 #endif
 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
   {
-    WRITE_MEMORY_BARRIER();
-    lock->__spinlock = __LT_SPINLOCK_INIT;
+    __pthread_release(&lock->__spinlock);
     return 0;
   }
 #endif

 #if defined HAS_COMPARE_AND_SWAP
+  WRITE_MEMORY_BARRIER();
+
 again:
   oldstatus = lock->__status;

@@ -170,29 +180,31 @@ again:
 	oldstatus, 0))
       return 0;
   }
-
   /* Find thread in waiting queue with maximal priority */
   ptr = (pthread_descr *) &lock->__status;
   thr = (pthread_descr) (oldstatus & ~1L);
   maxprio = 0;
   maxptr = ptr;
+
+  /* Before we iterate over the wait queue, we need to execute
+     a read barrier, otherwise we may read stale contents of nodes that may
+     just have been inserted by other processors. One read barrier is enough to
+     ensure we have a stable list; we don't need one for each pointer chase
+     through the list, because we are the owner of the lock; other threads
+     can only add nodes at the front; if a front node is consistent,
+     the ones behind it must also be. */
+
+  READ_MEMORY_BARRIER();
+
   while (thr != 0) {
     if (thr->p_priority >= maxprio) {
       maxptr = ptr;
       maxprio = thr->p_priority;
     }
     ptr = &(thr->p_nextlock);
-    /* Prevent reordering of the load of lock->__status above and the
-       load of *ptr below, as well as reordering of *ptr between
-       several iterations of the while loop.  Some processors (e.g.
-       multiprocessor Alphas) could perform such reordering even though
-       the loads are dependent. */
-    READ_MEMORY_BARRIER();
     thr = *ptr;
   }
-  /* Prevent reordering of the load of lock->__status above and
-     thr->p_nextlock below */
-  READ_MEMORY_BARRIER();
+
   /* Remove max prio thread from waiting list. */
   if (maxptr == (pthread_descr *) &lock->__status) {
     /* If max prio thread is at head, remove it with compare-and-swap
@@ -211,15 +223,21 @@ again:
     thr = *maxptr;
     *maxptr = thr->p_nextlock;

+    /* Ensure deletion from linked list completes before we
+       release the lock. */
+    WRITE_MEMORY_BARRIER();
+
     do {
       oldstatus = lock->__status;
     } while (!__compare_and_swap_with_release_semantics(&lock->__status,
 	     oldstatus, oldstatus & ~1L));
   }
-  /* Prevent reordering of store to *maxptr above and store to thr->p_nextlock
-     below */
-  WRITE_MEMORY_BARRIER();
-  /* Wake up the selected waiting thread */
+
+  /* Wake up the selected waiting thread. Woken thread can check
+     its own p_nextlock field for NULL to detect that it has been removed. No
+     barrier is needed here, since restart() and suspend() take
+     care of memory synchronization. */
+
   thr->p_nextlock = NULL;
   restart(thr);

@@ -288,8 +306,9 @@ static struct wait_node *wait_node_alloc
     if (oldvalue == 0)
       return malloc(sizeof *wait_node_alloc());

+    /* Ensure we don't read stale next link through oldvalue pointer. */
+    READ_MEMORY_BARRIER();
     newvalue = (long) ((struct wait_node *) oldvalue)->next;
-    WRITE_MEMORY_BARRIER();
   } while (! __compare_and_swap(&wait_node_free_list, oldvalue, newvalue));

   return (struct wait_node *) oldvalue;
@@ -324,6 +343,8 @@ static void wait_node_free(struct wait_n
     oldvalue = wait_node_free_list;
     wn->next = (struct wait_node *) oldvalue;
     newvalue = (long) wn;
+    /* Ensure node contents are written before we swap
+       it into the list. */
     WRITE_MEMORY_BARRIER();
   } while (! __compare_and_swap(&wait_node_free_list, oldvalue, newvalue));
 #endif
@@ -344,6 +365,10 @@ static void wait_node_dequeue(struct wai
      Otherwise it can be deleted in the straightforward way. */

   if (pp_node == pp_head) {
+    /* We don't need a read barrier between these next two loads,
+       because it is assumed that the caller has already ensured
+       the stability of *p_node with respect to p_node. */
+
     long oldvalue = (long) p_node;
     long newvalue = (long) p_node->next;

@@ -355,8 +380,10 @@ static void wait_node_dequeue(struct wai
        know the identity of the node which now holds the pointer to the node
        being deleted, so we must search from the beginning. */

-    for (pp_node = pp_head; *pp_node != p_node; pp_node = &(*pp_node)->next)
-      ; /* null body */
+    for (pp_node = pp_head; p_node != *pp_node; ) {
+      pp_node = &(*pp_node)->next;
+      READ_MEMORY_BARRIER(); /* Stabilize *pp_node for next iteration. */
+    }
   }

   *pp_node = p_node->next;
@@ -394,8 +421,7 @@ void __pthread_alt_lock(struct _pthread_
       suspend_needed = 1;
     }

-    WRITE_MEMORY_BARRIER();
-    lock->__spinlock = __LT_SPINLOCK_INIT;
+    __pthread_release(&lock->__spinlock);

     if (suspend_needed)
       suspend (self);
@@ -428,6 +454,8 @@ void __pthread_alt_lock(struct _pthread_

   if (oldstatus != 0)
     suspend(self);
+
+  READ_MEMORY_BARRIER();
 #endif
 }

@@ -468,8 +496,7 @@ int __pthread_alt_timedlock(struct _pthr
       oldstatus = 1; /* force suspend */
     }

-    WRITE_MEMORY_BARRIER();
-    lock->__spinlock = __LT_SPINLOCK_INIT;
+    __pthread_release(&lock->__spinlock);
     goto suspend;
   }
 #endif
@@ -517,6 +544,8 @@ int __pthread_alt_timedlock(struct _pthr

   wait_node_free(p_wait_node);

+  READ_MEMORY_BARRIER();
+
   return 1; /* Got the lock! */
 }

@@ -526,6 +555,8 @@ void __pthread_alt_unlock(struct _pthrea
   struct wait_node ** const pp_head = (struct wait_node **) &lock->__status;
   int maxprio;

+  WRITE_MEMORY_BARRIER();
+
 #if defined TEST_FOR_COMPARE_AND_SWAP
   if (!__pthread_has_cas)
 #endif
@@ -576,12 +607,15 @@ void __pthread_alt_unlock(struct _pthrea
     p_max_prio = p_node = *pp_head;
     maxprio = INT_MIN;

+    READ_MEMORY_BARRIER(); /* Prevent access to stale data through p_node */
+
     while (p_node != (struct wait_node *) 1) {
       int prio;

       if (p_node->abandoned) {
 	/* Remove abandoned node. */
 #if defined TEST_FOR_COMPARE_AND_SWAP
+        /* TODO: move this ugly #ifdef crap into wait_node_dequeue() */
 	if (!__pthread_has_cas)
 #endif
 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
@@ -594,8 +628,13 @@ void __pthread_alt_unlock(struct _pthrea
 	  wait_node_dequeue(pp_head, pp_node, p_node);
 #endif
 	wait_node_free(p_node);
-	READ_MEMORY_BARRIER();
-	p_node = *pp_node;
+        /* Note that the next assignment may take us to the beginning
+	   of the queue, to newly inserted nodes, if pp_node == pp_head.
+	   In that case we need a memory barrier to stabilize the first of
+	   these new nodes. */
+	p_node = *pp_node;
+	if (pp_node == pp_head)
+	  READ_MEMORY_BARRIER(); /* No stale reads through p_node */
 	continue;
       } else if ((prio = p_node->thr->p_priority) >= maxprio) {
 	/* Otherwise remember it if its thread has a higher or equal priority
@@ -605,13 +644,12 @@ void __pthread_alt_unlock(struct _pthrea
 	p_max_prio = p_node;
       }

+      /* This cannog jump backward in the list, so no further
+         read barrier is needed. */
       pp_node = &p_node->next;
-      READ_MEMORY_BARRIER();
       p_node = *pp_node;
     }

-    READ_MEMORY_BARRIER();
-
     /* If all threads abandoned, go back to top */
     if (maxprio == INT_MIN)
       continue;
@@ -638,7 +676,6 @@ void __pthread_alt_unlock(struct _pthrea
 #if defined HAS_COMPARE_AND_SWAP
 	wait_node_dequeue(pp_head, pp_max_prio, p_max_prio);
 #endif
-      WRITE_MEMORY_BARRIER();
       restart(p_max_prio->thr);
       break;
     }
@@ -649,8 +686,7 @@ void __pthread_alt_unlock(struct _pthrea
 #endif
 #if !defined HAS_COMPARE_AND_SWAP || defined TEST_FOR_COMPARE_AND_SWAP
   {
-    WRITE_MEMORY_BARRIER();
-    lock->__spinlock = __LT_SPINLOCK_INIT;
+    __pthread_release(&lock->__spinlock);
   }
 #endif
 }
@@ -668,15 +704,17 @@ int __pthread_compare_and_swap(long * pt
                                int * spinlock)
 {
   int res;
-  if (testandset(spinlock)) __pthread_acquire(spinlock);
+
+  __pthread_acquire(spinlock);
+
   if (*ptr == oldval) {
     *ptr = newval; res = 1;
   } else {
     res = 0;
   }
-  /* Prevent reordering of store to *ptr above and store to *spinlock below */
-  WRITE_MEMORY_BARRIER();
-  *spinlock = 0;
+
+  __pthread_release(spinlock);
+
   return res;
 }

@@ -705,6 +743,8 @@ static void __pthread_acquire(int * spin
 {
   int cnt = 0;
   struct timespec tm;
+
+  READ_MEMORY_BARRIER();

   while (testandset(spinlock)) {
     if (cnt < MAX_SPIN_COUNT) {



Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]