deadlock on console mutex in gdb

David McFarland corngood@gmail.com
Thu Dec 23 19:28:58 GMT 2021


I've also been hitting another console related deadlock, even when
there's no debugger involved:

#0  0x00007ffca30ecdf4 in ntdll!ZwWaitForSingleObject () from /cygdrive/c/WINDOWS/SYSTEM32/ntdll.dll
#1  0x00007ffca0d91a5e in WaitForSingleObjectEx () from /cygdrive/c/WINDOWS/System32/KERNELBASE.dll
#2  0x0000000180075f5c in acquire_attach_mutex (t=4294967295) at ../../../../winsup/cygwin/fhandler_console.cc:66
#3  fhandler_console::write (this=0x180375e50, vsrc=0x804f229c8, len=94) at ../../../../winsup/cygwin/fhandler_console.cc:3112
#4  0x0000000180145c68 in write (fd=2, ptr=0x804f229c8, len=94) at ../../../../winsup/cygwin/syscalls.cc:1360
#5  0x000000018019442b in _sigfe () at sigfe.s:37
#6  0x000000046b6c24eb in nix::writeFull (fd=2, s="waiting for lock on \033[35;1m'/nix/store/lqcj88cy92w6mjsqicjmgbfj0dnggvdw-libiconv-1.16'\033[0m...\n", allowInterrupts=false) at src/libutil/util.cc:661
#7  0x000000046b6b43a5 in nix::writeToStderr (s="waiting for lock on \033[35;1m'/nix/store/lqcj88cy92w6mjsqicjmgbfj0dnggvdw-libiconv-1.16'\033[0m...\n") at src/libutil/logging.cc:119
#8  0x000000046b6d640d in nix::SimpleLogger::log (this=0x8000182f0, lvl=nix::lvlWarn, fs=...) at src/libutil/logging.cc:74
#9  0x000000046b6d6239 in nix::SimpleLogger::startActivity (this=0x8000182f0, act=8156142895253, lvl=nix::lvlWarn, type=nix::actBuildWaiting, s="waiting for lock on \033[35;1m'/nix/store/lqcj88cy92w6mjsqicjmgbfj0dnggvdw-libiconv-1.16'\033[0m", fields=std::vector of length 0, capacity 0, parent=0)
    at src/libutil/logging.cc:90
#10 0x000000046b6b449f in nix::Activity::Activity (this=0x804d470c0, logger=..., lvl=nix::lvlWarn, type=nix::actBuildWaiting, s="waiting for lock on \033[35;1m'/nix/store/lqcj88cy92w6mjsqicjmgbfj0dnggvdw-libiconv-1.16'\033[0m", fields=std::vector of length 0, capacity 0, parent=0) at src/libutil/logging.cc:139
#11 0x00000004f52dc061 in std::make_unique<nix::Activity, nix::Logger&, nix::Verbosity, nix::ActivityType, std::string> () at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/unique_ptr.h:962
#12 0x00000004f508fbbf in nix::DerivationGoal::tryToBuild (this=0x804e93fc0) at src/libstore/build/derivation-goal.cc:577
#13 0x00000004f508b2e7 in nix::DerivationGoal::work (this=0x804e93fc0) at src/libstore/build/derivation-goal.cc:144
#14 0x00000004f50b87e4 in nix::Worker::run (this=0xffff97f0, _topGoals=std::set with 1 element = {...}) at src/libstore/build/worker.cc:268
#15 0x00000004f5098c52 in nix::Store::buildPaths (this=0x8000ae2d8, reqs=std::vector of length 1, capacity 1 = {...}, buildMode=nix::bmNormal, evalStore=std::shared_ptr<nix::Store> (use count 5, weak count 1) = {...}) at src/libstore/build/entry-points.cc:24
#16 0x0000000100406243 in operator() (__closure=0xffffa6d0, paths0=std::vector of length 1, capacity 1 = {...}) at src/nix-build/nix-build.cc:345
#17 0x000000010040b361 in main_nix_build (argc=4, argv=0xffffcc60) at src/nix-build/nix-build.cc:574
#18 0x00000001005f5916 in std::__invoke_impl<void, void (*&)(int, char**), int, char**> (__f=@0xffffbe30: 0x1004063d7 <main_nix_build(int, char**)>) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/invoke.h:60
#19 0x00000001005e9a83 in std::__invoke_r<void, void (*&)(int, char**), int, char**> (__fn=@0xffffbe30: 0x1004063d7 <main_nix_build(int, char**)>) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/invoke.h:110
#20 0x000000010056b31b in std::_Function_handler<void (int, char**), void (*)(int, char**)>::_M_invoke(std::_Any_data const&, int&&, char**&&) (__functor=..., __args#0=@0xffffbde8: 4, __args#1=@0xffffbdf0: 0xffffcc60) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/std_function.h:291
#21 0x0000000100517d8c in std::function<void (int, char**)>::operator()(int, char**) const (this=0xffffbe30, __args#0=4, __args#1=0xffffcc60) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/std_function.h:622
#22 0x000000010044e248 in nix::mainWrapped (argc=4, argv=0xffffcc60) at src/nix/main.cc:277
#23 0x000000010044f54d in operator() (__closure=0xffffcbc0) at src/nix/main.cc:392
#24 0x00000001004508a2 in std::__invoke_impl<void, main(int, char**)::<lambda()>&>(std::__invoke_other, struct {...} &) (__f=...) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/invoke.h:60
#25 0x00000001004503d7 in std::__invoke_r<void, main(int, char**)::<lambda()>&>(struct {...} &) (__fn=...) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/invoke.h:110
#26 0x000000010044fdc0 in std::_Function_handler<void(), main(int, char**)::<lambda()> >::_M_invoke(const std::_Any_data &) (__functor=...) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/std_function.h:291
#27 0x00000004473816a2 in std::function<void ()>::operator()() const (this=0xffffcbc0) at /usr/lib/gcc/x86_64-pc-cygwin/10/include/c++/bits/std_function.h:622
#28 0x000000044736a686 in nix::handleExceptions(std::string const&, std::function<void ()>) (programName="test/bin/nix-build", fun=...) at src/libmain/shared.cc:373
#29 0x000000010044f5d3 in main (argc=4, argv=0xffffcc60) at src/nix/main.cc:391


This one is much harder to reproduce, but I think what's happening is:

- PT is opened with posix_openpt()

- attach_mutex is created:
  if (InterlockedIncrement (&master_cnt) == 1)
    attach_mutex = CreateMutex (&sa, FALSE, NULL);

- PT is closed by close()

- attach_mutex is destroyed:
  if (InterlockedDecrement (&master_cnt) == 0)
    CloseHandle (attach_mutex);

- the handle in attach_mutex is reused for another object

- fhandler::write is called on stdout/err

- WaitForSingleObject is called on attach_mutex, but it's pointing to
  something else, which deadlocks

While I don't have a solid repro for the deadlock, if I apply a patch
like this:


>From 926d05a1211fa8e5a7fc1cccdbe89e5c980dfad1 Mon Sep 17 00:00:00 2001
From: David McFarland <corngood@gmail.com>
Date: Thu, 23 Dec 2021 15:15:11 -0400
Subject: [PATCH] tty: add helper for attach_mutex

---
 winsup/cygwin/fhandler.h          |  9 +++++++++
 winsup/cygwin/fhandler_console.cc | 17 -----------------
 winsup/cygwin/fhandler_tty.cc     | 30 +++++++++++++++++++++++++-----
 winsup/cygwin/select.cc           | 16 ----------------
 4 files changed, 34 insertions(+), 38 deletions(-)

diff --git a/winsup/cygwin/fhandler.h b/winsup/cygwin/fhandler.h
index 4f70c4c0b..48d7107e5 100644
--- a/winsup/cygwin/fhandler.h
+++ b/winsup/cygwin/fhandler.h
@@ -1881,6 +1881,15 @@ class fhandler_serial: public fhandler_base
 #define release_output_mutex() \
   __release_output_mutex (__PRETTY_FUNCTION__, __LINE__)
 
+bool __acquire_attach_mutex (const char *fn, int ln, DWORD t);
+void __release_attach_mutex (const char *fn, int ln);
+
+#define acquire_attach_mutex(ms)                             \
+  __acquire_attach_mutex (__PRETTY_FUNCTION__, __LINE__, ms)
+
+#define release_attach_mutex()                           \
+  __release_attach_mutex (__PRETTY_FUNCTION__, __LINE__)
+
 class tty;
 class tty_min;
 class fhandler_termios: public fhandler_base
diff --git a/winsup/cygwin/fhandler_console.cc b/winsup/cygwin/fhandler_console.cc
index 4c98b5355..ca2d5c74e 100644
--- a/winsup/cygwin/fhandler_console.cc
+++ b/winsup/cygwin/fhandler_console.cc
@@ -56,23 +56,6 @@ fhandler_console::console_state NO_COPY *fhandler_console::shared_console_info;
 
 bool NO_COPY fhandler_console::invisible_console;
 
-/* Mutex for AttachConsole()/FreeConsole() in fhandler_tty.cc */
-HANDLE attach_mutex;
-
-static inline void
-acquire_attach_mutex (DWORD t)
-{
-  if (attach_mutex)
-    WaitForSingleObject (attach_mutex, t);
-}
-
-static inline void
-release_attach_mutex ()
-{
-  if (attach_mutex)
-    ReleaseMutex (attach_mutex);
-}
-
 /* con_ra is shared in the same process.
    Only one console can exist in a process, therefore, static is suitable. */
 static struct fhandler_base::rabuf_t con_ra;
diff --git a/winsup/cygwin/fhandler_tty.cc b/winsup/cygwin/fhandler_tty.cc
index c8ad53cb7..c425c158b 100644
--- a/winsup/cygwin/fhandler_tty.cc
+++ b/winsup/cygwin/fhandler_tty.cc
@@ -28,6 +28,7 @@ details. */
 #include "registry.h"
 #include "tls_pbuf.h"
 #include "winf.h"
+#include <assert.h>
 
 #ifndef PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE
 #define PROC_THREAD_ATTRIBUTE_PSEUDOCONSOLE 0x00020016
@@ -56,7 +57,26 @@ struct pipe_reply {
   DWORD error;
 };
 
-extern HANDLE attach_mutex; /* Defined in fhandler_console.cc */
+/* Mutex for AttachConsole()/FreeConsole() */
+HANDLE attach_mutex;
+
+bool __acquire_attach_mutex (const char *fn, int ln, DWORD t)
+{
+  if (!attach_mutex)
+    return false;
+  DWORD res = WaitForSingleObject (attach_mutex, t);
+  assert(res == WAIT_TIMEOUT || res == WAIT_OBJECT_0);
+  return res == WAIT_OBJECT_0;
+}
+
+void __release_attach_mutex (const char *fn, int ln)
+{
+  if (!attach_mutex)
+    return;
+  BOOL r = ReleaseMutex (attach_mutex);
+  assert(r);
+}
+
 static LONG master_cnt = 0;
 
 inline static bool pcon_pid_alive (DWORD pid);
@@ -519,13 +539,13 @@ fhandler_pty_master::accept_input ()
 	{
 	  /* Slave attaches to a different console than master.
 	     Therefore reattach here. */
-	  WaitForSingleObject (attach_mutex, INFINITE);
+    acquire_attach_mutex (INFINITE);
 	  FreeConsole ();
 	  AttachConsole (target_pid);
 	  cp_to = GetConsoleCP ();
 	  FreeConsole ();
 	  AttachConsole (resume_pid);
-	  ReleaseMutex (attach_mutex);
+    release_attach_mutex ();
 	}
       else
 	cp_to = GetConsoleCP ();
@@ -2827,13 +2847,13 @@ fhandler_pty_master::pty_master_fwd_thread (const master_fwd_thread_param_t *p)
 	{
 	  /* Slave attaches to a different console than master.
 	     Therefore reattach here. */
-	  WaitForSingleObject (attach_mutex, INFINITE);
+    acquire_attach_mutex (INFINITE);
 	  FreeConsole ();
 	  AttachConsole (target_pid);
 	  cp_from = GetConsoleOutputCP ();
 	  FreeConsole ();
 	  AttachConsole (resume_pid);
-	  ReleaseMutex (attach_mutex);
+    release_attach_mutex ();
 	}
       else
 	cp_from = GetConsoleOutputCP ();
diff --git a/winsup/cygwin/select.cc b/winsup/cygwin/select.cc
index a2868abd0..95edc10fe 100644
--- a/winsup/cygwin/select.cc
+++ b/winsup/cygwin/select.cc
@@ -1095,22 +1095,6 @@ fhandler_fifo::select_except (select_stuff *ss)
   return s;
 }
 
-extern HANDLE attach_mutex; /* Defined in fhandler_console.cc */
-
-static inline void
-acquire_attach_mutex (DWORD t)
-{
-  if (attach_mutex)
-    WaitForSingleObject (attach_mutex, t);
-}
-
-static inline void
-release_attach_mutex ()
-{
-  if (attach_mutex)
-    ReleaseMutex (attach_mutex);
-}
-
 static int
 peek_console (select_record *me, bool)
 {
-- 
2.32.0


I can use this program:

=====test-pt.c
#include <pthread.h>
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>

void *thread(void* p) {
        for (int i = 0;; ++i) {
                int fd = posix_openpt(O_RDWR | O_NOCTTY);
                close(fd);
                fprintf(stderr, "test %i\n", i);
        }
        return 0;
}

int main() {
        pthread_t ids[3];
        for (int i = 0; i < 3; ++i) {
                pthread_create(&ids[i], 0, &thread, &ids[i]);
        }
        for (int i = 0; i < 3; ++i) {
                pthread_join(ids[i], 0);
        }
        return 0;
}
=====

compiled with:

$ gcc -D_GNU_SOURCE test-pt.c -o test-pt

To hit those asserts.


Perhaps the mutex should never be closed? It's also worrying that
acquiring the mutex just silently fails before a PT is opened.
There could be a thread that is acting as if it has a lock a the time
that the mutex is created, and it will even try to release the lock.

It might just be safest to create it at process start, but it could have
a significant performance cost.


More information about the Cygwin-developers mailing list