This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[RFC] PI aware condvars


This is an RFC for converting the internal condvar lock to be PI-aware.
The patch only updates the generic 'c' code and not the arch specific
code as I wanted to get feedback before delving into the actual
implementation (and all of the assembly!)
(The patch has some ugly inline asm to be able to test the generic 
 implementation)

The patch contains the following

1. Add two new APIs to explicitly set the condvar to be PI aware,
   pthread_condattr_getprotocol_np and pthread_condattr_setprotocol_np.
   These two APIs are functionally equivalent to the
   pthread_mutexattr_g(s)etprotocol functions and accept the same set of
   arguments. (PTHREAD_PRIO_NONE, PTHREAD_PRIO_INHERIT and
   PTHREAD_PRIO_PROTECT). Though PTHREAD_PRIO_PROTECT is currently not
   supported.
   The initial approach tried to avoid creating these two new APIs and
   instead checked to see if the mutex associated with the condvar
   was PI. If so it then used the PI kernel primitives for the cond lock.
   However the signalling thread (pthread_cond_broadcast/signal) can
   race with the waiter thread (pthread_cond_(timed)wait) and so it is
   not always possible to determine what kind of a mutex is associated
   with the condvar at signal time, which is what led to these 2 new APIs.

2. The implementation uses 2 bits of the __nwaiters field to represent the
   cond protocol (One of PTHREAD_PRIO_NONE, PTHREAD_PRIO_INHERIT or
   PTHREAD_PRIO_PROTECT). This leaves 29 bits for the __nwaiters field,
   which is still pretty large IMO as it is only used to keep track of the
   last waiter for cleanup purposes. (Hopefully there wont be more than
   512M waiters!)

The patch is against latest git

I have done a sanity test with the above patch to see if it solves the
priority inversion problem that we were seeing earlier without this change.
I'll attach the testcase in the following mail. 

Really appreciate feedback on this, Thanks!

        -Dinakar


diff -Nurp glibc-20100107/nptl/Makefile glibc-20100107.mod/nptl/Makefile
--- glibc-20100107/nptl/Makefile	2009-11-30 12:08:07.000000000 -0500
+++ glibc-20100107.mod/nptl/Makefile	2010-01-08 08:03:42.000000000 -0500
@@ -75,6 +75,7 @@ libpthread-routines = nptl-init vars eve
 		      old_pthread_cond_signal old_pthread_cond_broadcast \
 		      pthread_condattr_init pthread_condattr_destroy \
 		      pthread_condattr_getpshared pthread_condattr_setpshared \
+		      pthread_condattr_getprotocol_np pthread_condattr_setprotocol_np \
 		      pthread_condattr_getclock pthread_condattr_setclock \
 		      pthread_spin_init pthread_spin_destroy \
 		      pthread_spin_lock pthread_spin_trylock \
diff -Nurp glibc-20100107/nptl/pthread_condattr_getclock.c glibc-20100107.mod/nptl/pthread_condattr_getclock.c
--- glibc-20100107/nptl/pthread_condattr_getclock.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_condattr_getclock.c	2010-01-08 08:03:42.000000000 -0500
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2003,2004,2007,2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
 
@@ -25,7 +25,8 @@ pthread_condattr_getclock (attr, clock_i
      const pthread_condattr_t *attr;
      clockid_t *clock_id;
 {
-  *clock_id = (((((const struct pthread_condattr *) attr)->value) >> 1)
-	       & ((1 << COND_NWAITERS_SHIFT) - 1));
+  *clock_id = (((((const struct pthread_condattr *) attr)->value)
+	       >> CONDATTR_CLOCKID_SHIFT)
+	       & ((1 << COND_PROTOCOL_SHIFT) - 1));
   return 0;
 }
diff -Nurp glibc-20100107/nptl/pthread_condattr_getprotocol_np.c glibc-20100107.mod/nptl/pthread_condattr_getprotocol_np.c
--- glibc-20100107/nptl/pthread_condattr_getprotocol_np.c	1969-12-31 19:00:00.000000000 -0500
+++ glibc-20100107.mod/nptl/pthread_condattr_getprotocol_np.c	2010-01-08 08:03:42.000000000 -0500
@@ -0,0 +1,34 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Dinakar Guniguntala <dino@in.ibm.com>, 2010.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include "pthreadP.h"
+
+
+int
+pthread_condattr_getprotocol_np (attr, protocol)
+     const pthread_condattr_t *attr;
+     int *protocol;
+{
+  *protocol = ((const struct pthread_condattr *) attr)->value;
+
+  *protocol = ((*protocol & CONDATTR_PROTOCOL_MASK)
+	       >> CONDATTR_PROTOCOL_SHIFT);
+
+  return 0;
+}
diff -Nurp glibc-20100107/nptl/pthread_condattr_setclock.c glibc-20100107.mod/nptl/pthread_condattr_setclock.c
--- glibc-20100107/nptl/pthread_condattr_setclock.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_condattr_setclock.c	2010-01-08 08:03:42.000000000 -0500
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2007, 2008 Free Software Foundation, Inc.
+/* Copyright (C) 2003,2004,2007,2008,2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2003.
 
@@ -62,11 +62,11 @@ pthread_condattr_setclock (attr, clock_i
     return EINVAL;
 
   /* Make sure the value fits in the bits we reserved.  */
-  assert (clock_id < (1 << COND_NWAITERS_SHIFT));
+  assert (clock_id < (1 << COND_PROTOCOL_SHIFT));
 
   int *valuep = &((struct pthread_condattr *) attr)->value;
 
-  *valuep = ((*valuep & ~(((1 << COND_NWAITERS_SHIFT) - 1) << 1))
+  *valuep = ((*valuep & ~(((1 << COND_PROTOCOL_SHIFT) - 1) << 1))
 	     | (clock_id << 1));
 
   return 0;
diff -Nurp glibc-20100107/nptl/pthread_condattr_setprotocol_np.c glibc-20100107.mod/nptl/pthread_condattr_setprotocol_np.c
--- glibc-20100107/nptl/pthread_condattr_setprotocol_np.c	1969-12-31 19:00:00.000000000 -0500
+++ glibc-20100107.mod/nptl/pthread_condattr_setprotocol_np.c	2010-01-08 08:03:42.000000000 -0500
@@ -0,0 +1,39 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   Contributed by Dinakar Guniguntala <dino@in.ibm.com>, 2010.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <errno.h>
+#include <pthreadP.h>
+
+int
+pthread_condattr_setprotocol_np (attr, protocol)
+     pthread_condattr_t *attr;
+     int protocol;
+{
+  if (protocol != PTHREAD_PRIO_NONE
+      && protocol != PTHREAD_PRIO_INHERIT
+      && __builtin_expect (protocol != PTHREAD_PRIO_PROTECT, 0))
+    return EINVAL;
+
+  int *valuep = &((struct pthread_condattr *) attr)->value;
+
+  *valuep = ((*valuep & ~CONDATTR_PROTOCOL_MASK)
+	     | (protocol << CONDATTR_PROTOCOL_SHIFT));
+
+  return 0;
+}
diff -Nurp glibc-20100107/nptl/pthread_cond_broadcast.c glibc-20100107.mod/nptl/pthread_cond_broadcast.c
--- glibc-20100107/nptl/pthread_cond_broadcast.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_cond_broadcast.c	2010-01-08 08:03:42.000000000 -0500
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 2003, 2004, 2006, 2007, 2010 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Martin Schwidefsky <schwidefsky@de.ibm.com>, 2003.
 
@@ -27,6 +27,8 @@
 #include <shlib-compat.h>
 #include <kernel-features.h>
 
+#include "cond-lock.h"
+ 
 
 int
 __pthread_cond_broadcast (cond)
@@ -34,8 +36,9 @@ __pthread_cond_broadcast (cond)
 {
   int pshared = (cond->__data.__mutex == (void *) ~0l)
 		? LLL_SHARED : LLL_PRIVATE;
+
   /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
+  cond_lock(cond, pshared);
 
   /* Are there any waiters to be woken?  */
   if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
@@ -45,36 +48,48 @@ __pthread_cond_broadcast (cond)
       cond->__data.__woken_seq = cond->__data.__total_seq;
       cond->__data.__futex = (unsigned int) cond->__data.__total_seq * 2;
       int futex_val = cond->__data.__futex;
+      pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
+      int err;
       /* Signal that a broadcast happened.  */
       ++cond->__data.__broadcast_seq;
 
       /* We are done.  */
-      lll_unlock (cond->__data.__lock, pshared);
+      cond_unlock(cond, pshared);
 
       /* Do not use requeue for pshared condvars.  */
       if (cond->__data.__mutex == (void *) ~0l)
 	goto wake_all;
 
-      /* Wake everybody.  */
-      pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
-
-      /* XXX: Kernel so far doesn't support requeue to PI futex.  */
-      /* XXX: Kernel so far can only requeue to the same type of futex,
-	 in this case private (we don't requeue for pshared condvars).  */
+      /* We don't requeue for pshared condvars  */
       if (__builtin_expect (mut->__data.__kind
-			    & (PTHREAD_MUTEX_PRIO_INHERIT_NP
-			       | PTHREAD_MUTEX_PSHARED_BIT), 0))
-	goto wake_all;
-
-      /* lll_futex_requeue returns 0 for success and non-zero
-	 for errors.  */
-      if (__builtin_expect (lll_futex_requeue (&cond->__data.__futex, 1,
-					       INT_MAX, &mut->__data.__lock,
-					       futex_val, LLL_PRIVATE), 0))
+			    & PTHREAD_MUTEX_PSHARED_BIT, 0))
+ 	goto wake_all;
+ 
+      if (__builtin_expect (mut->__data.__kind
+ 			    & PTHREAD_MUTEX_PRIO_INHERIT_NP, 0))
+	{
+	 /* lll_futex_requeue_pi returns the number of tasks requeue'd on
+	    success and negative for errors.  */
+         err = lll_futex_requeue_pi (&cond->__data.__futex, 1, INT_MAX, 
+				     &mut->__data.__lock, futex_val,
+				     LLL_PRIVATE);
+	 /* The requeue_pi functionality is not available.  */
+	 if (__builtin_expect (err == -ENOSYS, 0))
+	   goto wake_all;
+	}
+      else
 	{
-	  /* The requeue functionality is not available.  */
-	wake_all:
-	  lll_futex_wake (&cond->__data.__futex, INT_MAX, pshared);
+	 /* lll_futex_requeue_pi returns the number of tasks requeue'd on
+	    success and negative for errors.  */
+	 err = lll_futex_requeue (&cond->__data.__futex, 1, INT_MAX,
+				  &mut->__data.__lock, futex_val,
+				  LLL_PRIVATE);
+	 if (__builtin_expect (err == -ENOSYS, 0))
+	   {
+	    /* The requeue functionality is not available.  */
+	   wake_all:
+	    lll_futex_wake (&cond->__data.__futex, INT_MAX, pshared);
+	   }
 	}
 
       /* That's all.  */
@@ -82,7 +97,7 @@ __pthread_cond_broadcast (cond)
     }
 
   /* We are done.  */
-  lll_unlock (cond->__data.__lock, pshared);
+  cond_unlock(cond, pshared);
 
   return 0;
 }
diff -Nurp glibc-20100107/nptl/pthread_cond_init.c glibc-20100107.mod/nptl/pthread_cond_init.c
--- glibc-20100107/nptl/pthread_cond_init.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_cond_init.c	2010-01-08 08:03:42.000000000 -0500
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008
+/* Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
@@ -32,9 +32,26 @@ __pthread_cond_init (cond, cond_attr)
   cond->__data.__lock = LLL_LOCK_INITIALIZER;
   cond->__data.__futex = 0;
   cond->__data.__nwaiters = (icond_attr != NULL
-			     ? ((icond_attr->value >> 1)
-				& ((1 << COND_NWAITERS_SHIFT) - 1))
+			     ? ((icond_attr->value >> CONDATTR_CLOCKID_SHIFT)
+				& ((1 << COND_PROTOCOL_SHIFT) - 1))
 			     : CLOCK_REALTIME);
+  if (icond_attr != NULL)
+   {
+    switch (icond_attr->value & CONDATTR_PROTOCOL_MASK)
+     {
+     case PTHREAD_PRIO_INHERIT << CONDATTR_PROTOCOL_SHIFT:
+       cond->__data.__nwaiters |= COND_PRIO_INHERIT;
+       break;
+
+     case PTHREAD_PRIO_PROTECT << CONDATTR_PROTOCOL_SHIFT:
+       cond->__data.__nwaiters |= COND_PRIO_PROTECT;
+       break;
+
+     default:
+       break;
+     }
+   }
+
   cond->__data.__total_seq = 0;
   cond->__data.__wakeup_seq = 0;
   cond->__data.__woken_seq = 0;
diff -Nurp glibc-20100107/nptl/pthread_cond_signal.c glibc-20100107.mod/nptl/pthread_cond_signal.c
--- glibc-20100107/nptl/pthread_cond_signal.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_cond_signal.c	2010-01-08 08:03:42.000000000 -0500
@@ -27,16 +27,19 @@
 #include <shlib-compat.h>
 #include <kernel-features.h>
 
+#include "cond-lock.h"
+
 
 int
 __pthread_cond_signal (cond)
      pthread_cond_t *cond;
 {
-  int pshared = (cond->__data.__mutex == (void *) ~0l)
-		? LLL_SHARED : LLL_PRIVATE;
+  int err, futex_val;
+  pthread_mutex_t *mutex = (pthread_mutex_t *) cond->__data.__mutex;
+  int pshared = (mutex == (void *) ~0l) ? LLL_SHARED : LLL_PRIVATE;
 
   /* Make sure we are alone.  */
-  lll_lock (cond->__data.__lock, pshared);
+  cond_lock(cond, pshared);
 
   /* Are there any waiters to be woken?  */
   if (cond->__data.__total_seq > cond->__data.__wakeup_seq)
@@ -44,18 +47,38 @@ __pthread_cond_signal (cond)
       /* Yes.  Mark one of them as woken.  */
       ++cond->__data.__wakeup_seq;
       ++cond->__data.__futex;
+      futex_val = cond->__data.__futex;
+      pthread_mutex_t *mut = (pthread_mutex_t *) cond->__data.__mutex;
 
+      /* We dont do requeue-pi for pshared mutexes */
+      if (pshared == LLL_PRIVATE &&
+	  mut->__data.__kind & PTHREAD_MUTEX_PRIO_INHERIT_NP)
+        {
+	 /* lll_futex_requeue_pi returns the number of tasks requeue'd on
+	    success and negative for errors.  */
+         err = lll_futex_requeue_pi (&cond->__data.__futex, 1, 0, 
+				     &mut->__data.__lock, futex_val,
+				     pshared);
+	 /* The requeue_pi functionality is not available.  */
+	 if (__builtin_expect (err != -ENOSYS, 0))
+           goto done;
+        }
+#if 0
+      /* This is not needed for the x86_64 or i686 arches */
       /* Wake one.  */
-      if (! __builtin_expect (lll_futex_wake_unlock (&cond->__data.__futex, 1,
-						     1, &cond->__data.__lock,
-						     pshared), 0))
+      else if (! __builtin_expect (lll_futex_wake_unlock (&cond->__data.__futex,
+							  1, 1,
+							  &cond->__data.__lock,
+						          pshared), 0))
 	return 0;
+#endif
 
       lll_futex_wake (&cond->__data.__futex, 1, pshared);
     }
 
+done:
   /* We are done.  */
-  lll_unlock (cond->__data.__lock, pshared);
+  cond_unlock(cond, pshared);
 
   return 0;
 }
diff -Nurp glibc-20100107/nptl/pthread_cond_timedwait.c glibc-20100107.mod/nptl/pthread_cond_timedwait.c
--- glibc-20100107/nptl/pthread_cond_timedwait.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_cond_timedwait.c	2010-01-08 08:05:30.000000000 -0500
@@ -27,6 +27,8 @@
 
 #include <shlib-compat.h>
 
+#include "cond-lock.h"
+
 
 /* Cleanup handler, defined in pthread_cond_wait.c.  */
 extern void __condvar_cleanup (void *arg)
@@ -49,6 +51,7 @@ __pthread_cond_timedwait (cond, mutex, a
   struct _pthread_cleanup_buffer buffer;
   struct _condvar_cleanup_buffer cbuffer;
   int result = 0;
+  int pi_requeued;
 
   /* Catch invalid parameters.  */
   if (abstime->tv_nsec < 0 || abstime->tv_nsec >= 1000000000)
@@ -57,14 +60,14 @@ __pthread_cond_timedwait (cond, mutex, a
   int pshared = (cond->__data.__mutex == (void *) ~0l)
 		? LLL_SHARED : LLL_PRIVATE;
 
-  /* Make sure we are along.  */
-  lll_lock (cond->__data.__lock, pshared);
+  /* Make sure we are alone.  */
+  cond_lock(cond, pshared);
 
   /* Now we can release the mutex.  */
   int err = __pthread_mutex_unlock_usercnt (mutex, 0);
   if (err)
     {
-      lll_unlock (cond->__data.__lock, pshared);
+      cond_unlock(cond, pshared);
       return err;
     }
 
@@ -97,6 +100,7 @@ __pthread_cond_timedwait (cond, mutex, a
 
   while (1)
     {
+      pi_requeued = 0;
       struct timespec rt;
       {
 #ifdef __NR_clock_gettime
@@ -104,7 +108,7 @@ __pthread_cond_timedwait (cond, mutex, a
 	int ret;
 	ret = INTERNAL_SYSCALL (clock_gettime, err, 2,
 				(cond->__data.__nwaiters
-				 & ((1 << COND_NWAITERS_SHIFT) - 1)),
+				 & ((1 << COND_PROTOCOL_SHIFT) - 1)),
 				&rt);
 # ifndef __ASSUME_POSIX_TIMERS
 	if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (ret, err), 0))
@@ -150,20 +154,34 @@ __pthread_cond_timedwait (cond, mutex, a
       unsigned int futex_val = cond->__data.__futex;
 
       /* Prepare to wait.  Release the condvar futex.  */
-      lll_unlock (cond->__data.__lock, pshared);
+      cond_unlock(cond, pshared);
 
       /* Enable asynchronous cancellation.  Required by the standard.  */
       cbuffer.oldtype = __pthread_enable_asynccancel ();
 
+      if (mutex->__data.__kind & PTHREAD_MUTEX_PRIO_INHERIT_NP)
+        {
+	  /* Try requeueing to the PI mutex, if no support in the kernel
+	     try the non-requeue syscall.  */
+          err = lll_futex_timed_wait_requeue_pi (cond, futex_val, abstime,
+						 &mutex->__data.__lock,
+						 pshared);
+          if (__builtin_expect (!err, 1))
+	       pi_requeued = 1;
+          if (__builtin_expect (err != -ENOSYS, 0))
+	       goto woken;
+	}
+
       /* Wait until woken by signal or broadcast.  */
       err = lll_futex_timed_wait (&cond->__data.__futex,
 				  futex_val, &rt, pshared);
 
+woken:
       /* Disable asynchronous cancellation.  */
       __pthread_disable_asynccancel (cbuffer.oldtype);
 
       /* We are going to look at shared data again, so get the lock.  */
-      lll_lock (cond->__data.__lock, pshared);
+      cond_lock(cond, pshared);
 
       /* If a broadcast happened, we are done.  */
       if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
@@ -203,13 +221,17 @@ __pthread_cond_timedwait (cond, mutex, a
     lll_futex_wake (&cond->__data.__nwaiters, 1, pshared);
 
   /* We are done with the condvar.  */
-  lll_unlock (cond->__data.__lock, pshared);
+  cond_unlock(cond, pshared);
 
   /* The cancellation handling is back to normal, remove the handler.  */
   __pthread_cleanup_pop (&buffer, 0);
 
-  /* Get the mutex before returning.  */
-  err = __pthread_mutex_cond_lock (mutex);
+  /* Get the mutex before returning. If the requeue_pi call above was successful,
+     the lock is already held in the kernel, so just return to the application.  */
+  if (pi_requeued)
+    __pthread_mutex_cond_lock_adjust (mutex);
+  else
+    err = __pthread_mutex_cond_lock (mutex);
 
   return err ?: result;
 }
diff -Nurp glibc-20100107/nptl/pthread_cond_wait.c glibc-20100107.mod/nptl/pthread_cond_wait.c
--- glibc-20100107/nptl/pthread_cond_wait.c	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/pthread_cond_wait.c	2010-01-08 08:05:37.000000000 -0500
@@ -17,15 +17,19 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
+#include <assert.h>
 #include <endian.h>
 #include <errno.h>
 #include <sysdep.h>
 #include <lowlevellock.h>
+#include <not-cancel.h>
 #include <pthread.h>
 #include <pthreadP.h>
 
 #include <shlib-compat.h>
 
+#include "cond-lock.h"
+ 
 
 struct _condvar_cleanup_buffer
 {
@@ -38,16 +42,55 @@ struct _condvar_cleanup_buffer
 
 void
 __attribute__ ((visibility ("hidden")))
+lll_pi_lock(int *futexp, int private)
+{
+  pid_t id = THREAD_GETMEM (THREAD_SELF, tid);
+  int newval = id;
+  int ret;
+
+  newval |= FUTEX_WAITERS; 
+  ret = atomic_compare_and_exchange_val_acq (futexp, newval, 0);
+
+  if (ret != 0)
+    {
+      /* The mutex is locked.  The kernel will now take care of
+	 everything.  */
+      INTERNAL_SYSCALL_DECL (__err);
+      int e = INTERNAL_SYSCALL (futex, __err, 4, futexp,
+				__lll_private_flag (FUTEX_LOCK_PI, private),
+				1, 0);
+    }
+}
+
+
+void
+__attribute__ ((visibility ("hidden")))
+lll_pi_unlock(int *futexp, int private)
+{
+
+  if ((*futexp & FUTEX_WAITERS) != 0
+      || atomic_compare_and_exchange_bool_acq (futexp, 0,
+					       THREAD_GETMEM (THREAD_SELF,
+							      tid)))
+    {
+      INTERNAL_SYSCALL_DECL (__err);
+      INTERNAL_SYSCALL (futex, __err, 2, futexp,
+			__lll_private_flag (FUTEX_UNLOCK_PI, private));
+    }
+}
+
+ 
+void
+__attribute__ ((visibility ("hidden")))
 __condvar_cleanup (void *arg)
 {
   struct _condvar_cleanup_buffer *cbuffer =
     (struct _condvar_cleanup_buffer *) arg;
   unsigned int destroying;
-  int pshared = (cbuffer->cond->__data.__mutex == (void *) ~0l)
-  		? LLL_SHARED : LLL_PRIVATE;
+  int pshared = (cbuffer->mutex == (void *) ~0l) ? LLL_SHARED : LLL_PRIVATE;
 
   /* We are going to modify shared data.  */
-  lll_lock (cbuffer->cond->__data.__lock, pshared);
+  cond_lock(cbuffer->cond, pshared);
 
   if (cbuffer->bc_seq == cbuffer->cond->__data.__broadcast_seq)
     {
@@ -78,7 +121,7 @@ __condvar_cleanup (void *arg)
     }
 
   /* We are done.  */
-  lll_unlock (cbuffer->cond->__data.__lock, pshared);
+  cond_unlock(cbuffer->cond, pshared);
 
   /* Wake everybody to make sure no condvar signal gets lost.  */
   if (! destroying)
@@ -97,18 +140,19 @@ __pthread_cond_wait (cond, mutex)
 {
   struct _pthread_cleanup_buffer buffer;
   struct _condvar_cleanup_buffer cbuffer;
-  int err;
   int pshared = (cond->__data.__mutex == (void *) ~0l)
   		? LLL_SHARED : LLL_PRIVATE;
+  int pi_requeued;
+  int err;
 
   /* Make sure we are along.  */
-  lll_lock (cond->__data.__lock, pshared);
+  cond_lock(cond, pshared);
 
   /* Now we can release the mutex.  */
   err = __pthread_mutex_unlock_usercnt (mutex, 0);
   if (__builtin_expect (err, 0))
     {
-      lll_unlock (cond->__data.__lock, pshared);
+      cond_unlock(cond, pshared);
       return err;
     }
 
@@ -142,21 +186,35 @@ __pthread_cond_wait (cond, mutex)
   do
     {
       unsigned int futex_val = cond->__data.__futex;
+      pi_requeued = 0;
 
       /* Prepare to wait.  Release the condvar futex.  */
-      lll_unlock (cond->__data.__lock, pshared);
+      cond_unlock(cond, pshared);
 
       /* Enable asynchronous cancellation.  Required by the standard.  */
       cbuffer.oldtype = __pthread_enable_asynccancel ();
 
+      if (mutex->__data.__kind & PTHREAD_MUTEX_PRIO_INHERIT_NP)
+	{
+	  /* Try requeueing to the PI mutex, if no support in the kernel
+             try the non-requeue syscall.  */
+          err = lll_futex_wait_requeue_pi (cond, futex_val, 
+					   &mutex->__data.__lock, pshared);
+          if (__builtin_expect (!err, 1))
+	       pi_requeued = 1;
+          if (__builtin_expect (err != -ENOSYS, 0))
+	       goto woken;
+	}
+          
       /* Wait until woken by signal or broadcast.  */
       lll_futex_wait (&cond->__data.__futex, futex_val, pshared);
 
+woken:
       /* Disable asynchronous cancellation.  */
       __pthread_disable_asynccancel (cbuffer.oldtype);
 
       /* We are going to look at shared data again, so get the lock.  */
-      lll_lock (cond->__data.__lock, pshared);
+      cond_lock(cond, pshared);
 
       /* If a broadcast happened, we are done.  */
       if (cbuffer.bc_seq != cond->__data.__broadcast_seq)
@@ -182,13 +240,20 @@ __pthread_cond_wait (cond, mutex)
     lll_futex_wake (&cond->__data.__nwaiters, 1, pshared);
 
   /* We are done with the condvar.  */
-  lll_unlock (cond->__data.__lock, pshared);
+  cond_unlock(cond, pshared);
 
   /* The cancellation handling is back to normal, remove the handler.  */
   __pthread_cleanup_pop (&buffer, 0);
 
-  /* Get the mutex before returning.  */
-  return __pthread_mutex_cond_lock (mutex);
+  /* Get the mutex before returning. If the requeue_pi call above was successful,
+     the lock is already held in the kernel, so just return 0 to application.  */
+  if (pi_requeued)
+    {
+      __pthread_mutex_cond_lock_adjust (mutex);
+      return 0;
+    }
+  else
+    return __pthread_mutex_cond_lock (mutex);
 }
 
 versioned_symbol (libpthread, __pthread_cond_wait, pthread_cond_wait,
diff -Nurp glibc-20100107/nptl/sysdeps/pthread/cond-lock.h glibc-20100107.mod/nptl/sysdeps/pthread/cond-lock.h
--- glibc-20100107/nptl/sysdeps/pthread/cond-lock.h	1969-12-31 19:00:00.000000000 -0500
+++ glibc-20100107.mod/nptl/sysdeps/pthread/cond-lock.h	2010-01-08 08:03:42.000000000 -0500
@@ -0,0 +1,58 @@
+/* Copyright (C) 2010 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#ifndef _COND_LOCK_H
+#define _COND_LOCK_H 1
+
+
+extern void lll_pi_lock (int *futex, int pshared)
+     __attribute__ ((visibility ("hidden")));
+extern void lll_pi_unlock (int *futex, int pshared)
+     __attribute__ ((visibility ("hidden")));
+
+static inline void cond_lock(pthread_cond_t *cond,
+			     int pshared);
+
+static inline void cond_unlock(pthread_cond_t *cond,
+			       int pshared);
+
+static inline void cond_lock(cond, pshared)
+     pthread_cond_t *cond;
+     int pshared;
+{
+  if (pshared == LLL_PRIVATE
+      && ((cond->__data.__nwaiters & COND_PROTOCOL_MASK)
+	  == COND_PRIO_INHERIT))
+    lll_pi_lock (&cond->__data.__lock, pshared);
+  else
+    lll_lock (cond->__data.__lock, pshared);
+}
+
+static inline void cond_unlock(cond, pshared)
+     pthread_cond_t *cond;
+     int pshared;
+{
+  if (pshared == LLL_PRIVATE
+      && ((cond->__data.__nwaiters & COND_PROTOCOL_MASK)
+	  == COND_PRIO_INHERIT))
+    lll_pi_unlock (&cond->__data.__lock, pshared);
+  else
+    lll_unlock (cond->__data.__lock, pshared);
+}
+ 
+#endif
diff -Nurp glibc-20100107/nptl/sysdeps/pthread/pthread.h glibc-20100107.mod/nptl/sysdeps/pthread/pthread.h
--- glibc-20100107/nptl/sysdeps/pthread/pthread.h	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/sysdeps/pthread/pthread.h	2010-01-08 08:03:42.000000000 -0500
@@ -998,6 +998,18 @@ extern int pthread_condattr_getpshared (
 extern int pthread_condattr_setpshared (pthread_condattr_t *__attr,
                                         int __pshared) __THROW __nonnull ((1));
 
+/* Get the protocol flag of the condition variable attribute ATTR.  */
+extern int pthread_condattr_getprotocol_np (__const pthread_condattr_t *
+                                            __restrict __attr,
+                                            int *__restrict __protocol)
+     __THROW __nonnull ((1, 2));
+
+/* Set the cond protocol attribute in ATTR to protocol (one of
+   PTHREAD_PRIO_NONE, PTHREAD_PRIO_INHERIT or PTHREAD_PRIO_PROTECT).  */
+extern int pthread_condattr_setprotocol_np (pthread_condattr_t *__attr,
+                                            int __protocol)
+     __THROW __nonnull ((1));
+
 #ifdef __USE_XOPEN2K
 /* Get the clock selected for the conditon variable attribute ATTR.  */
 extern int pthread_condattr_getclock (__const pthread_condattr_t *
diff -Nurp glibc-20100107/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h glibc-20100107.mod/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h
--- glibc-20100107/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h	2009-12-14 02:53:09.000000000 -0500
+++ glibc-20100107.mod/nptl/sysdeps/unix/sysv/linux/i386/lowlevellock.h	2010-01-08 08:03:42.000000000 -0500
@@ -1,4 +1,5 @@
-/* Copyright (C) 2002-2004, 2006-2008, 2009 Free Software Foundation, Inc.
+/* Copyright (C) 2002,2003,2004,2006,2007,2008,2010
+   Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
 
@@ -238,6 +239,91 @@ LLL_STUB_UNWIND_INFO_END
   } while (0)
 
 
+#define lll_futex_wait_requeue_pi(cond, val, mutex, private) \
+  lll_futex_timed_wait_requeue_pi (cond, val, NULL, mutex, private)
+
+
+#define lll_futex_timed_wait_requeue_pi(cond, val, time, mutex, private) \
+  ({									      \
+    INTERNAL_SYSCALL_DECL (__err);					      \
+    long int __ret;							      \
+    unsigned int *__futexp = &cond->__data.__futex;			      \
+    int __flag = FUTEX_WAIT_REQUEUE_PI;					      \
+    __flag = (cond->__data.__nwaiters & COND_CLOCK_MASK) ?		      \
+	      __flag  : __flag | FUTEX_CLOCK_REALTIME;			      \
+    __ret = INTERNAL_SYSCALL (futex, __err, 5,				      \
+			      (__futexp),				      \
+			      __lll_private_flag (__flag, private),	      \
+			      (val), time,				      \
+			      (mutex));					      \
+    INTERNAL_SYSCALL_ERROR_P (__ret, __err)? -__ret : __ret;		      \
+  })
+
+
+#define lll_futex_requeue(futex, nr_wake, nr_move, mutex, val, private) \
+  ({									      \
+    int __status;							      \
+    int __save_ebx;							      \
+    int __dummy;							      \
+    int __flag = FUTEX_CMP_REQUEUE;					      \
+    register __typeof (nr_move) _nr_move asm ("esi") = (nr_move);	      \
+    __asm __volatile ("mov %%ebx, %0\n\t"				      \
+		      "mov %1, %%ebx\n\t"				      \
+		      LLL_EBX_LOAD					      \
+		      "pushl %%ebp\n\t"					      \
+		      "mov %3, %%ebp\n\t"				      \
+		      : "=m" (__save_ebx)				      \
+		      : "c" (mutex), LLL_EBX_REG (futex),		      \
+		        "a" (val));					      \
+    __asm __volatile (LLL_ENTER_KERNEL					      \
+		      "popl %%ebp\n\t"					      \
+		      : "=a" (__status)					      \
+		      : "0" (SYS_futex), 				      \
+			"c" (__lll_private_flag (__flag, private)),	      \
+			"d" (nr_wake), "S" (_nr_move),			      \
+			"i" (0) /* phony, to align next arg's number */,      \
+			"i" (offsetof (tcbhead_t, sysinfo))		      \
+		      : "memory");					      \
+    __asm __volatile (LLL_EBX_LOAD					      \
+		      "mov %1, %%ebx\n\t"				      \
+		      : "=m" (__dummy)					      \
+		      : "m" (__save_ebx), LLL_EBX_REG (futex));		      \
+    __status;								      \
+  })
+
+
+#define lll_futex_requeue_pi(futex, nr_wake, nr_move, mutex, val, private) \
+  ({									      \
+    int __status;							      \
+    int __save_ebx;							      \
+    int __dummy;							      \
+    int __flag = FUTEX_CMP_REQUEUE_PI;					      \
+    register __typeof (nr_move) _nr_move asm ("esi") = (nr_move);	      \
+    __asm __volatile ("mov %%ebx, %0\n\t"				      \
+		      "mov %1, %%ebx\n\t"				      \
+		      LLL_EBX_LOAD					      \
+		      "pushl %%ebp\n\t"					      \
+		      "mov %3, %%ebp\n\t"				      \
+		      : "=m" (__save_ebx)				      \
+		      : "c" (mutex), LLL_EBX_REG (futex),		      \
+		        "a" (val));					      \
+    __asm __volatile (LLL_ENTER_KERNEL					      \
+		      "popl %%ebp\n\t"					      \
+		      : "=a" (__status)					      \
+		      : "0" (SYS_futex), 				      \
+			"c" (__lll_private_flag (__flag, private)),	      \
+			"d" (nr_wake), "S" (_nr_move),			      \
+			"i" (0) /* phony, to align next arg's number */,      \
+			"i" (offsetof (tcbhead_t, sysinfo))		      \
+		      : "memory");					      \
+    __asm __volatile (LLL_EBX_LOAD					      \
+		      "mov %1, %%ebx\n\t"				      \
+		      : "=m" (__dummy)					      \
+		      : "m" (__save_ebx), LLL_EBX_REG (futex));		      \
+    __status;								      \
+  })
+
+
 /* NB: in the lll_trylock macro we simply return the value in %eax
    after the cmpxchg instruction.  In case the operation succeded this
    value is zero.  In case the operation failed, the cmpxchg instruction
diff -Nurp glibc-20100107/nptl/sysdeps/unix/sysv/linux/internaltypes.h glibc-20100107.mod/nptl/sysdeps/unix/sysv/linux/internaltypes.h
--- glibc-20100107/nptl/sysdeps/unix/sysv/linux/internaltypes.h	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/sysdeps/unix/sysv/linux/internaltypes.h	2010-01-08 08:03:42.000000000 -0500
@@ -67,20 +67,38 @@ struct pthread_condattr
 {
   /* Combination of values:
 
-     Bit 0  : flag whether coditional variable will be shareable between
+     Bit 0  : flag whether conditional variable will be shareable between
 	      processes.
 
-     Bit 1-7: clock ID.  */
+     Bit 1-7: clock ID.
+     Bit 8-9: protocol. One of PTHREAD_PRIO_NONE, PTHREAD_PRIO_INHERIT
+              or PTHREAD_PRIO_PROTECT.  */
   int value;
 };
 
 
+#define CONDATTR_PSHARED_MASK	0x00000001
+#define CONDATTR_CLOCKID_MASK	0x000000FE
+#define CONDATTR_CLOCKID_SHIFT	1
+#define CONDATTR_PROTOCOL_MASK	0x00000300
+#define CONDATTR_PROTOCOL_SHIFT	8
+
+
+enum {
+  COND_PRIO_INHERIT = 2,
+  COND_PRIO_PROTECT
+};
+
+
 /* The __NWAITERS field is used as a counter and to house the number
-   of bits for other purposes.  COND_CLOCK_BITS is the number
-   of bits needed to represent the ID of the clock.  COND_NWAITERS_SHIFT
+   of bits for other purposes.  COND_CLOCK_MASK defines the bits used
+   to represent the ID of the clock.  COND_PROTOCOL_MASK defines the
+   bits used to represent cond protocol attrbutes. COND_NWAITERS_SHIFT
    is the number of bits reserved for other purposes like the clock.  */
-#define COND_CLOCK_BITS		1
-#define COND_NWAITERS_SHIFT	1
+#define COND_CLOCK_MASK		0x00000001
+#define COND_PROTOCOL_SHIFT	1
+#define COND_PROTOCOL_MASK	0x00000006
+#define COND_NWAITERS_SHIFT	3
 
 
 /* Read-write lock variable attribute data structure.  */
diff -Nurp glibc-20100107/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h glibc-20100107.mod/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h
--- glibc-20100107/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/sysdeps/unix/sysv/linux/x86_64/lowlevellock.h	2010-01-08 08:03:42.000000000 -0500
@@ -223,6 +223,31 @@ LLL_STUB_UNWIND_INFO_END
   })
 
 
+#define lll_futex_wait_requeue_pi(cond, val, mutex, private) \
+  lll_futex_timed_wait_requeue_pi(cond, val, NULL, mutex, private)
+
+
+#define lll_futex_timed_wait_requeue_pi(cond, val, timeout, mutex, private) \
+  ({									      \
+    register const struct timespec *__to __asm ("r10") = timeout;	      \
+    register void *__mutex __asm  ("r8") = mutex;			      \
+    unsigned int *__futexp = &cond->__data.__futex;			      \
+    int __flag = FUTEX_WAIT_REQUEUE_PI;					      \
+    __flag = (cond->__data.__nwaiters & COND_CLOCK_MASK) ?		      \
+		      __flag  : __flag | FUTEX_CLOCK_REALTIME;		      \
+    int __status;							      \
+    register __typeof (val) _val __asm ("edx") = (val);			      \
+    __asm __volatile ("syscall"						      \
+		      : "=a" (__status)					      \
+		      : "0" (SYS_futex), "D" (__futexp),		      \
+			"S" (__lll_private_flag (__flag, private)),	      \
+			"d" (_val), "r" (__to),				      \
+			"r" (__mutex)					      \
+		      : "memory", "cc", "r11", "cx");			      \
+    __status;								      \
+  })
+
+
 #define lll_futex_wake(futex, nr, private) \
   do {									      \
     int __ignore;							      \
@@ -554,6 +579,21 @@ LLL_STUB_UNWIND_INFO_END
 		       : "cx", "r11", "cc", "memory");			      \
      __res < 0; })
 
+/* Returns non-zero if error happened, zero if success.  */
+#define lll_futex_requeue_pi(ftx, nr_wake, nr_move, mutex, val, private) \
+  ({ int __res;								      \
+     register int __nr_move __asm ("r10") = nr_move;			      \
+     register void *__mutex __asm ("r8") = mutex;			      \
+     register int __val __asm ("r9") = val;				      \
+     __asm __volatile ("syscall"					      \
+		       : "=a" (__res)					      \
+		       : "0" (__NR_futex), "D" ((void *) ftx),		      \
+			 "S" (__lll_private_flag (FUTEX_CMP_REQUEUE_PI,	      \
+						  private)), "d" (nr_wake),   \
+			 "r" (__nr_move), "r" (__mutex), "r" (__val)	      \
+		       : "cx", "r11", "cc", "memory");			      \
+     __res < 0; })
+
 #define lll_islocked(futex) \
   (futex != LLL_LOCK_INITIALIZER)
 
diff -Nurp glibc-20100107/nptl/Versions glibc-20100107.mod/nptl/Versions
--- glibc-20100107/nptl/Versions	2009-10-12 00:51:20.000000000 -0400
+++ glibc-20100107.mod/nptl/Versions	2010-01-08 08:03:42.000000000 -0500
@@ -244,6 +244,10 @@ libpthread {
     pthread_sigqueue;
   };
 
+  GLIBC_2.12 {
+    pthread_condattr_getprotocol_np; pthread_condattr_setprotocol_np;
+  }
+
   GLIBC_PRIVATE {
     __pthread_initialize_minimal;
     __pthread_clock_gettime; __pthread_clock_settime;
diff -Nurp glibc-20100107/Versions.def glibc-20100107.mod/Versions.def
--- glibc-20100107/Versions.def	2009-11-18 02:11:27.000000000 -0500
+++ glibc-20100107.mod/Versions.def	2010-01-08 08:03:42.000000000 -0500
@@ -91,6 +91,7 @@ libpthread {
   GLIBC_2.4
   GLIBC_2.6
   GLIBC_2.11
+  GLIBC_2.12
   GLIBC_PRIVATE
 }
 libresolv {


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]