This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][BZ #14652] Fix deadlock on PI mutex in pthread_cond_waitcleanup handler


Hi,

This bug is related in principle to pr#14417.  If a thread is
cancelled just after it returns successfully from a futex wait for a PI
mutex and before async cancellation is disabled, it already holds the
mutex and hence, the cleanup handler gets deadlocked trying to lock a
mutex it already owns.  The bug report has a reproducer that
demonstrates the problem.

The attached patch fixes this by checking if the mutex is owned by the
current thread and if it is, then resume rewinding without attempting
to lock the mutex.  The patch has a test case that verifies that this
is fixed.  I have verified this on x86_64 and i686 (32-bit build on
x86_64).  I have also verified that the test case fails without this
patch.

The patch applies on top of the patch for pr #14417 that I just
submitted.  OK to commit once the pr #14417 patch is approved?

Regards,
Siddhesh

Regards,
Siddhesh

nptl/ChangeLog:

	[BZ #14652]
	* Makefile (tests): New test case tst-cond25.
	(LDFLAGS-tst-cond25): Link tst-cond25 against librt.
	* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
	(__condvar_tw_cleanup): Lock mutex only if we don't already
	own it.
	* sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
	(__condvar_w_cleanup): Likewise.
	* sysdeps/unix/sysv/linux/pthread-pi-defines.sym: Add TID_MASK.
	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
	(__condvar_cleanup2): Lock mutex only if we don't already
	own it.
	* sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
	(__condvar_cleanup1): Likewise.
	* tst-cond25.c: New test case.
diff --git a/nptl/Makefile b/nptl/Makefile
index f21276c..de324fa 100644
--- a/nptl/Makefile
+++ b/nptl/Makefile
@@ -206,7 +206,7 @@ tests = tst-typesizes \
 	tst-cond1 tst-cond2 tst-cond3 tst-cond4 tst-cond5 tst-cond6 tst-cond7 \
 	tst-cond8 tst-cond9 tst-cond10 tst-cond11 tst-cond12 tst-cond13 \
 	tst-cond14 tst-cond15 tst-cond16 tst-cond17 tst-cond18 tst-cond19 \
-	tst-cond20 tst-cond21 tst-cond22 tst-cond23 tst-cond24 \
+	tst-cond20 tst-cond21 tst-cond22 tst-cond23 tst-cond24 tst-cond25 \
 	tst-cond-except \
 	tst-robust1 tst-robust2 tst-robust3 tst-robust4 tst-robust5 \
 	tst-robust6 tst-robust7 tst-robust8 tst-robust9 \
@@ -276,6 +276,7 @@ gen-as-const-headers = pthread-errnos.sym
 LDFLAGS-pthread.so = -Wl,--enable-new-dtags,-z,nodelete,-z,initfirst
 
 LDFLAGS-tst-cond24 = -lrt
+LDFLAGS-tst-cond25 = -lrt
 
 include ../Makeconfig
 
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
index 6761c13..884987c 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_timedwait.S
@@ -649,10 +649,24 @@ __condvar_tw_cleanup:
 	movl	$0x7fffffff, %edx
 	ENTER_KERNEL
 
+	/* Lock the mutex only if we don't own it already.  This only happens
+	   in case of PI mutexes, if we got cancelled after a successful
+	   return of the futex syscall and before disabling async
+	   cancellation.  */
 5:	movl	24+FRAME_SIZE(%esp), %eax
-	call	__pthread_mutex_cond_lock
+	movl	MUTEX_KIND(%eax), %ebx
+	andl	$(ROBUST_BIT|PI_BIT), %ebx
+	cmpl	$PI_BIT, %ebx
+	jne	8f
+
+	movl	(%eax), %ebx
+	andl	$TID_MASK, %ebx
+	cmpl	%ebx, %gs:TID
+	je	9f
+
+8:	call	__pthread_mutex_cond_lock
 
-	movl	%esi, (%esp)
+9:	movl	%esi, (%esp)
 .LcallUR:
 	call	_Unwind_Resume
 	hlt
diff --git a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
index 0af06ac..bf1e5fe 100644
--- a/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/i386/i486/pthread_cond_wait.S
@@ -566,10 +566,24 @@ __condvar_w_cleanup:
 	movl	$0x7fffffff, %edx
 	ENTER_KERNEL
 
+	/* Lock the mutex only if we don't own it already.  This only happens
+	   in case of PI mutexes, if we got cancelled after a successful
+	   return of the futex syscall and before disabling async
+	   cancellation.  */
 5:	movl	24+FRAME_SIZE(%esp), %eax
-	call	__pthread_mutex_cond_lock
+	movl	MUTEX_KIND(%eax), %ebx
+	andl	$(ROBUST_BIT|PI_BIT), %ebx
+	cmpl	$PI_BIT, %ebx
+	jne	8f
+
+	movl	(%eax), %ebx
+	andl	$TID_MASK, %ebx
+	cmpl	%ebx, %gs:TID
+	je	9f
+
+8:	call	__pthread_mutex_cond_lock
 
-	movl	%esi, (%esp)
+9:	movl	%esi, (%esp)
 .LcallUR:
 	call	_Unwind_Resume
 	hlt
diff --git a/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym b/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym
index 46fbd0d..0ac51db 100644
--- a/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym
+++ b/nptl/sysdeps/unix/sysv/linux/pthread-pi-defines.sym
@@ -6,3 +6,4 @@ MUTEX_KIND	offsetof (pthread_mutex_t, __data.__kind)
 ROBUST_BIT	PTHREAD_MUTEX_ROBUST_NORMAL_NP
 PI_BIT		PTHREAD_MUTEX_PRIO_INHERIT_NP
 PS_BIT		PTHREAD_MUTEX_PSHARED_BIT
+TID_MASK	FUTEX_TID_MASK
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
index b669abb..eb13326 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_timedwait.S
@@ -771,10 +771,24 @@ __condvar_cleanup2:
 	movl	$SYS_futex, %eax
 	syscall
 
+	/* Lock the mutex only if we don't own it already.  This only happens
+	   in case of PI mutexes, if we got cancelled after a successful
+	   return of the futex syscall and before disabling async
+	   cancellation.  */
 5:	movq	16(%rsp), %rdi
-	callq	__pthread_mutex_cond_lock
+	movl	MUTEX_KIND(%rdi), %eax
+	andl	$(ROBUST_BIT|PI_BIT), %eax
+	cmpl	$PI_BIT, %eax
+	jne	7f
+
+	movl	(%rdi), %eax
+	andl	$TID_MASK, %eax
+	cmpl	%eax, %fs:TID
+	je	8f
+
+7:	callq	__pthread_mutex_cond_lock
 
-	movq	24(%rsp), %rdi
+8:	movq	24(%rsp), %rdi
 	movq	FRAME_SIZE(%rsp), %r15
 	movq	FRAME_SIZE+8(%rsp), %r14
 	movq	FRAME_SIZE+16(%rsp), %r13
diff --git a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
index ec403cd..6c6dc0e 100644
--- a/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
+++ b/nptl/sysdeps/unix/sysv/linux/x86_64/pthread_cond_wait.S
@@ -495,10 +495,24 @@ __condvar_cleanup1:
 	movl	$SYS_futex, %eax
 	syscall
 
+	/* Lock the mutex only if we don't own it already.  This only happens
+	   in case of PI mutexes, if we got cancelled after a successful
+	   return of the futex syscall and before disabling async
+	   cancellation.  */
 5:	movq	16(%rsp), %rdi
-	callq	__pthread_mutex_cond_lock
+	movl	MUTEX_KIND(%rdi), %eax
+	andl	$(ROBUST_BIT|PI_BIT), %eax
+	cmpl	$PI_BIT, %eax
+	jne	7f
+
+	movl	(%rdi), %eax
+	andl	$TID_MASK, %eax
+	cmpl	%eax, %fs:TID
+	je	8f
+
+7:	callq	__pthread_mutex_cond_lock
 
-	movq	24(%rsp), %rdi
+8:	movq	24(%rsp), %rdi
 .LcallUR:
 	call	_Unwind_Resume@PLT
 	hlt
diff --git a/nptl/tst-cond25.c b/nptl/tst-cond25.c
new file mode 100644
index 0000000..4488e74
--- /dev/null
+++ b/nptl/tst-cond25.c
@@ -0,0 +1,282 @@
+/* Verify that condition variables synchronized by PI mutexes don't hang on
+   on cancellation.
+   Copyright (C) 2012 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <time.h>
+
+#define NUM 5
+#define ITERS 10000
+#define COUNT 100
+
+typedef void *(*thr_func) (void *);
+
+pthread_mutex_t mutex;
+pthread_cond_t cond;
+
+void cleanup (void *u)
+{
+  /* pthread_cond_wait should always return with the mutex locked.  */
+  if (pthread_mutex_unlock (&mutex))
+    abort ();
+}
+
+void *
+signaller (void *u)
+{
+  int i, ret = 0;
+  void *tret = NULL;
+
+  for (i = 0; i < ITERS; i++)
+    {
+      if ((ret = pthread_mutex_lock (&mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("signaller:mutex_lock failed: %s\n", strerror (ret));
+	  goto out;
+	}
+      if ((ret = pthread_cond_signal (&cond)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("signaller:signal failed: %s\n", strerror (ret));
+	  goto unlock_out;
+	}
+      if ((ret = pthread_mutex_unlock (&mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("signaller:mutex_unlock failed: %s\n", strerror (ret));
+	  goto out;
+	}
+      pthread_testcancel ();
+    }
+
+out:
+  return tret;
+
+unlock_out:
+  if ((ret = pthread_mutex_unlock (&mutex)) != 0)
+    printf ("signaller:mutex_unlock[2] failed: %s\n", strerror (ret));
+  goto out;
+}
+
+void *
+waiter (void *u)
+{
+  int i, ret = 0;
+  void *tret = NULL;
+  int seq = (int)u;
+
+  for (i = 0; i < ITERS / NUM; i++)
+    {
+      if ((ret = pthread_mutex_lock (&mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("waiter[%u]:mutex_lock failed: %s\n", seq, strerror (ret));
+	  goto out;
+	}
+      pthread_cleanup_push (cleanup, NULL);
+
+      if ((ret = pthread_cond_wait (&cond, &mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("waiter[%u]:wait failed: %s\n", seq, strerror (ret));
+	  goto unlock_out;
+	}
+
+      if ((ret = pthread_mutex_unlock (&mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("waiter[%u]:mutex_unlock failed: %s\n", seq, strerror (ret));
+	  goto out;
+	}
+      pthread_cleanup_pop (0);
+    }
+
+out:
+  puts ("waiter tests done");
+  return tret;
+
+unlock_out:
+  if ((ret = pthread_mutex_unlock (&mutex)) != 0)
+    printf ("waiter:mutex_unlock[2] failed: %s\n", strerror (ret));
+  goto out;
+}
+
+void *
+timed_waiter (void *u)
+{
+  int i, ret;
+  void *tret = NULL;
+  int seq = (int)u;
+
+  for (i = 0; i < ITERS / NUM; i++)
+    {
+      struct timespec ts;
+
+      if ((ret = clock_gettime(CLOCK_REALTIME, &ts)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("%u:clock_gettime failed: %s\n", seq, strerror (errno));
+	  goto out;
+	}
+      ts.tv_sec += 20;
+
+      if ((ret = pthread_mutex_lock (&mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("waiter[%u]:mutex_lock failed: %s\n", seq, strerror (ret));
+	  goto out;
+	}
+      pthread_cleanup_push (cleanup, NULL);
+
+      /* We should not time out either.  */
+      if ((ret = pthread_cond_timedwait (&cond, &mutex, &ts)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("waiter[%u]:timedwait failed: %s\n", seq, strerror (ret));
+	  goto unlock_out;
+	}
+      if ((ret = pthread_mutex_unlock (&mutex)) != 0)
+        {
+	  tret = (void *)1;
+	  printf ("waiter[%u]:mutex_unlock failed: %s\n", seq, strerror (ret));
+	  goto out;
+	}
+      pthread_cleanup_pop (0);
+    }
+
+out:
+  puts ("timed_waiter tests done");
+  return tret;
+
+unlock_out:
+  if ((ret = pthread_mutex_unlock (&mutex)) != 0)
+    printf ("waiter[%u]:mutex_unlock[2] failed: %s\n", seq, strerror (ret));
+  goto out;
+}
+
+int
+do_test_wait (thr_func f)
+{
+  pthread_t w[NUM];
+  pthread_t s;
+  pthread_mutexattr_t attr;
+  int i, j, ret = 0;
+  void *thr_ret;
+
+  for (i = 0; i < COUNT; i++)
+    {
+      if ((ret = pthread_mutexattr_init (&attr)) != 0)
+        {
+	  printf ("mutexattr_init failed: %s\n", strerror (ret));
+	  goto out;
+	}
+
+      if ((ret = pthread_mutexattr_setprotocol (&attr, PTHREAD_PRIO_INHERIT)) != 0)
+        {
+	  printf ("mutexattr_setprotocol failed: %s\n", strerror (ret));
+	  goto out;
+	}
+
+      if ((ret = pthread_cond_init (&cond, NULL)) != 0)
+        {
+	  printf ("cond_init failed: %s\n", strerror (ret));
+	  goto out;
+	}
+
+      if ((ret = pthread_mutex_init (&mutex, &attr)) != 0)
+        {
+	  printf ("mutex_init failed: %s\n", strerror (ret));
+	  goto out;
+	}
+
+      for (j = 0; j < NUM; j++)
+        if ((ret = pthread_create (&w[j], NULL, f, (void *)j)) != 0)
+	  {
+	    printf ("waiter[%d]: create failed: %s\n", j, strerror (ret));
+	    goto out;
+	  }
+
+      if ((ret = pthread_create (&s, NULL, signaller, NULL)) != 0)
+        {
+	  printf ("signaller: create failed: %s\n", strerror (ret));
+	  goto out;
+	}
+
+      for (j = 0; j < NUM; j++)
+        {
+          if ((ret = pthread_cancel (w[j])) != 0)
+	    {
+	      printf ("waiter[%d]: cancel failed: %s\n", j, strerror (ret));
+	      goto out;
+	    }
+
+          if ((ret = pthread_join (w[j], &thr_ret)) != 0)
+	    {
+	      printf ("waiter[%d]: join failed: %s\n", j, strerror (ret));
+	      goto out;
+	    }
+
+          if (thr_ret != NULL && thr_ret != PTHREAD_CANCELED)
+	    {
+	      ret = 1;
+	      goto out;
+	    }
+        }
+
+      /* The signalling thread could have ended before it was cancelled.  */
+      pthread_cancel (s);
+
+      if ((ret = pthread_join (s, &thr_ret)) != 0)
+        {
+	  printf ("signaller: join failed: %s\n", strerror (ret));
+	  goto out;
+	}
+
+      if (thr_ret != NULL && thr_ret != PTHREAD_CANCELED)
+        {
+          ret = 1;
+          goto out;
+        }
+    }
+
+out:
+  return ret;
+}
+
+int
+do_test (int argc, char **argv)
+{
+  int ret = do_test_wait (waiter);
+
+  if (ret)
+    return ret;
+
+  return do_test_wait (timed_waiter);
+}
+
+#define TIMEOUT 5
+#include "../test-skeleton.c"

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]