This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Workaround for glibc/BZ5983: following a child fork shows stale parent threads in the child.


The glibc/BZ5983 (*) bug is very unfortunate for multi-process, and
although it has been fixed in glibc since, many current instalations
are still affected by it --- my laptop included :-) ---, so it
is worth it to have a work around in GDB.

(*) - http://sourceware.org/bugzilla/show_bug.cgi?id=5983

Here's the symptom, in current GDB:

 (gdb) set follow-fork-mode child
 (gdb) r
 Starting program: /home/pedro/gdb/sspaces/build/gdb/testsuite/gdb.threads/fork-thread-pending
 [New Thread 0x7f9c18b4b6e0 (LWP 3651)]
 [Thread debugging using libthread_db enabled]
 [New Thread 0x40800950 (LWP 3661)]
 Thread <0> executing
 [New Thread 0x41001950 (LWP 3662)]
 Thread <1> executing
 [New Thread 0x41802950 (LWP 3663)]
 Thread <2> executing
 [New Thread 0x42003950 (LWP 3664)]
 Thread <3> executing
 [New Thread 0x42804950 (LWP 3665)]
 Thread <4> executing
 [New Thread 0x43005950 (LWP 3666)]
 Thread <5> executing
 [New Thread 0x43806950 (LWP 3667)]
 Thread <6> executing
 [New Thread 0x44007950 (LWP 3668)]
 Thread <7> executing
 [New Thread 0x44808950 (LWP 3669)]
 Thread <8> executing
 [New Thread 0x45009950 (LWP 3670)]
 Thread forker <9> executing
 [Thread debugging using libthread_db enabled]
 [New Thread 0x7ffff7fd66e0 (LWP 3658)]
 [New Thread 0x44808950 (LWP 3669)]
 [New Thread 0x44007950 (LWP 3668)]
 [New Thread 0x43806950 (LWP 3667)]
 [New Thread 0x43005950 (LWP 3666)]
 [New Thread 0x42804950 (LWP 3665)]
 [New Thread 0x42003950 (LWP 3664)]
 [New Thread 0x41802950 (LWP 3663)]
 [New Thread 0x41001950 (LWP 3662)]
 [New Thread 0x40800950 (LWP 3661)]
 [New Thread 0x44808950 (LWP 3672)]
 
 Program received signal SIGINT, Interrupt.
 [Switching to Thread 0x7ffff7fd66e0 (LWP 3658)]
 0x00007ffff7bcb796 in pthread_join () from /lib/libpthread.so.0
 (gdb) info threads
 During symbol reading, incomplete CFI data; unspecified registers (e.g., rax) at 0x7ffff7bcb6a8.
   12 Thread 0x44808950 (LWP 3672)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   11 Thread 0x40800950 (LWP 3661)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   10 Thread 0x41001950 (LWP 3662)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   9 Thread 0x41802950 (LWP 3663)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   8 Thread 0x42003950 (LWP 3664)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   7 Thread 0x42804950 (LWP 3665)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   6 Thread 0x43005950 (LWP 3666)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   5 Thread 0x43806950 (LWP 3667)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   4 Thread 0x44007950 (LWP 3668)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
   3 Thread 0x44808950 (LWP 3669)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
 * 2 Thread 0x7ffff7fd66e0 (LWP 3658)  0x00007ffff7bcb796 in pthread_join () from /lib/libpthread.so.0
   1 Thread 0x45009950 (LWP 3671)  0x00007ffff7bcb796 in pthread_join () from /lib/libpthread.so.0
 (gdb)             
 
We've followed the child, and in this test case, there should only
be 2 threads in the child, but GDB shows a bunch more.  If you
look closely, you'll see that most of the LWP ids are the same
as the lwps of the parent fork.  This is just bogus.

With the workaround patch applied, one gets:

 Program received signal SIGINT, Interrupt.
 [Switching to Thread 0x45009950 (LWP 3220)]
 0x00007ffff7bcb796 in pthread_join () from /lib/libpthread.so.0
 (gdb) info threads
 During symbol reading, incomplete CFI data; unspecified registers (e.g., rax) at 0x7ffff7bcb6a8.
   2 Thread 0x44808950 (LWP 3221)  0x00007ffff767eb81 in nanosleep () from /lib/libc.so.6
 * 1 Thread 0x45009950 (LWP 3220)  0x00007ffff7bcb796 in pthread_join () from /lib/libpthread.so.0
 (gdb)                                                                                              

Here's the test's source.  I haven't converted it to a dejagnu
test yet, but it will end up being one soon.

/* This testcase is part of GDB, the GNU debugger.

   Copyright 2008, 2009 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

#include <pthread.h>
#include <assert.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#define NUMTHREADS 10

volatile int done = 0;

static void *
start (void *arg)
{
  while (!done)
    usleep (100);
  assert (0);
  return arg;
}

void *
thread_function (void *arg)
{
  int x = * (int *) arg;

  printf ("Thread <%d> executing\n", x);

  while (!done)
    usleep (100);

  return NULL;
}

void *
thread_forker (void *arg)
{
  int x = * (int *) arg;
  pid_t pid;
  int rv;
  int i;
  pthread_t thread;

  printf ("Thread forker <%d> executing\n", x);

  switch ((pid = fork ()))
    {
    case -1:
      assert (0);
    default:
      wait (&rv);
      done = 1;
      break;
    case 0:
      i = pthread_create (&thread, NULL, start, NULL);
      assert (i == 0);
      i = pthread_join (thread, NULL);
      assert (i == 0);

      assert (0);
    }

  return NULL;
}

int
main (void)
{
  pthread_t threads[NUMTHREADS];
  int args[NUMTHREADS];
  int i, j;

  /* Create a few threads that do mostly nothing, and then one that
     forks.  */
  for (j = 0; j < NUMTHREADS - 1; ++j)
    {
      args[j] = j;
      pthread_create (&threads[j], NULL, thread_function, &args[j]);
    }

  args[j] = j;
  pthread_create (&threads[j], NULL, thread_forker, &args[j]);

  for (j = 0; j < NUMTHREADS; ++j)
    {
      pthread_join (threads[j], NULL);
    }

  return 0;
}

Below's the patch.  I've tested it on x86-64-linux, found no regressions,
and checked it in.

-- 
Pedro Alves

2009-05-18  Pedro Alves  <pedro@codesourcery.com>

	* linux-nat.h (linux_proc_get_tgid): Declare.
	* linux-nat.c (linux_proc_get_tgid): New.
	* linux-thread-db.c (struct thread_db_info): New field
	`need_stale_parent_threads_check'.
	(add_thread_db_info): Set it.
	(find_new_threads_callback): Ignore stale fork parent threads.
	(thread_db_resume): New.
	(init_thread_db_ops): Install thread_db_resume.

---
 gdb/linux-nat.c       |   28 +++++++++++++++++++++++++++
 gdb/linux-nat.h       |    4 +++
 gdb/linux-thread-db.c |   51 ++++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 79 insertions(+), 4 deletions(-)

Index: src/gdb/linux-nat.h
===================================================================
--- src.orig/gdb/linux-nat.h	2009-05-18 18:08:03.000000000 +0100
+++ src/gdb/linux-nat.h	2009-05-18 18:08:04.000000000 +0100
@@ -99,6 +99,10 @@ int thread_db_attach_lwp (ptid_t ptid);
 /* Find process PID's pending signal set from /proc/pid/status.  */
 void linux_proc_pending_signals (int pid, sigset_t *pending, sigset_t *blocked, sigset_t *ignored);
 
+/* Return the TGID of LWPID from /proc/pid/status.  Returns -1 if not
+   found.  */
+extern int linux_proc_get_tgid (int lwpid);
+
 /* linux-nat functions for handling fork events.  */
 extern void linux_enable_event_reporting (ptid_t ptid);
 
Index: src/gdb/linux-nat.c
===================================================================
--- src.orig/gdb/linux-nat.c	2009-05-18 18:08:03.000000000 +0100
+++ src/gdb/linux-nat.c	2009-05-18 18:08:04.000000000 +0100
@@ -1129,6 +1129,34 @@ exit_lwp (struct lwp_info *lp)
   delete_lwp (lp->ptid);
 }
 
+/* Return an lwp's tgid, found in `/proc/PID/status'.  */
+
+int
+linux_proc_get_tgid (int lwpid)
+{
+  FILE *status_file;
+  char buf[100];
+  int tgid = -1;
+
+  snprintf (buf, sizeof (buf), "/proc/%d/status", (int) lwpid);
+  status_file = fopen (buf, "r");
+  if (status_file != NULL)
+    {
+      while (fgets (buf, sizeof (buf), status_file))
+	{
+	  if (strncmp (buf, "Tgid:", 5) == 0)
+	    {
+	      tgid = strtoul (buf + strlen ("Tgid:"), NULL, 10);
+	      break;
+	    }
+	}
+
+      fclose (status_file);
+    }
+
+  return tgid;
+}
+
 /* Detect `T (stopped)' in `/proc/PID/status'.
    Other states including `T (tracing stop)' are reported as false.  */
 
Index: src/gdb/linux-thread-db.c
===================================================================
--- src.orig/gdb/linux-thread-db.c	2009-05-18 18:08:03.000000000 +0100
+++ src/gdb/linux-thread-db.c	2009-05-18 18:08:04.000000000 +0100
@@ -104,6 +104,13 @@ struct thread_db_info
   /* Connection to the libthread_db library.  */
   td_thragent_t *thread_agent;
 
+  /* True if we need to apply the workaround for glibc/BZ5983.  When
+     we catch a PTRACE_O_TRACEFORK, and go query the child's thread
+     list, nptl_db returns the parent's threads in addition to the new
+     (single) child thread.  If this flag is set, we do extra work to
+     be able to ignore such stale entries.  */
+  int need_stale_parent_threads_check;
+
   /* Location of the thread creation event breakpoint.  The code at
      this location in the child process will be called by the pthread
      library whenever a new thread is created.  By setting a special
@@ -168,6 +175,7 @@ add_thread_db_info (void *handle)
   info = xcalloc (1, sizeof (*info));
   info->pid = ptid_get_pid (inferior_ptid);
   info->handle = handle;
+  info->need_stale_parent_threads_check = 1;
 
   info->next = thread_db_list;
   thread_db_list = info;
@@ -1269,8 +1277,6 @@ find_new_threads_callback (const td_thrh
   if (ti.ti_state == TD_THR_UNKNOWN || ti.ti_state == TD_THR_ZOMBIE)
     return 0;			/* A zombie -- ignore.  */
 
-  ptid = ptid_build (info->pid, ti.ti_lid, 0);
-
   if (ti.ti_tid == 0)
     {
       /* A thread ID of zero means that this is the main thread, but
@@ -1279,14 +1285,29 @@ find_new_threads_callback (const td_thrh
 	 be yet.  Just enable event reporting and otherwise ignore
 	 it.  */
 
+      /* In that case, we're not stopped in a fork syscall and don't
+	 need this glibc bug workaround.  */
+      info->need_stale_parent_threads_check = 0;
+
       err = info->td_thr_event_enable_p (th_p, 1);
       if (err != TD_OK)
-	error (_("Cannot enable thread event reporting for %s: %s"),
-	       target_pid_to_str (ptid), thread_db_err_str (err));
+	error (_("Cannot enable thread event reporting for LWP %d: %s"),
+	       (int) ti.ti_lid, thread_db_err_str (err));
 
       return 0;
     }
 
+  /* Ignore stale parent threads, caused by glibc/BZ5983.  This is a
+     bit expensive, as it needs to open /proc/pid/status, so try to
+     avoid doing the work if we know we don't have to.  */
+  if (info->need_stale_parent_threads_check)
+    {
+      int tgid = linux_proc_get_tgid (ti.ti_lid);
+      if (tgid != -1 && tgid != info->pid)
+	return 0;
+    }
+
+  ptid = ptid_build (info->pid, ti.ti_lid, 0);
   tp = find_thread_pid (ptid);
   if (tp == NULL || tp->private == NULL)
     attach_thread (ptid, th_p, &ti);
@@ -1480,6 +1501,27 @@ thread_db_get_ada_task_ptid (long lwp, l
 }
 
 static void
+thread_db_resume (struct target_ops *ops,
+		  ptid_t ptid, int step, enum target_signal signo)
+{
+  struct target_ops *beneath = find_target_beneath (ops);
+  struct thread_db_info *info;
+
+  if (ptid_equal (ptid, minus_one_ptid))
+    info = get_thread_db_info (GET_PID (inferior_ptid));
+  else
+    info = get_thread_db_info (GET_PID (ptid));
+
+  /* This workaround is only needed for child fork lwps stopped in a
+     PTRACE_O_TRACEFORK event.  When the inferior is resumed, the
+     workaround can be disabled.  */
+  if (info)
+    info->need_stale_parent_threads_check = 0;
+
+  beneath->to_resume (beneath, ptid, step, signo);
+}
+
+static void
 init_thread_db_ops (void)
 {
   thread_db_ops.to_shortname = "multi-thread";
@@ -1487,6 +1529,7 @@ init_thread_db_ops (void)
   thread_db_ops.to_doc = "Threads and pthreads support.";
   thread_db_ops.to_detach = thread_db_detach;
   thread_db_ops.to_wait = thread_db_wait;
+  thread_db_ops.to_resume = thread_db_resume;
   thread_db_ops.to_mourn_inferior = thread_db_mourn_inferior;
   thread_db_ops.to_find_new_threads = thread_db_find_new_threads;
   thread_db_ops.to_pid_to_str = thread_db_pid_to_str;


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]