This is the mail archive of the gdb-patches@sourceware.org mailing list for the GDB project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[non-stop] 08/10 linux native support


This adds the linux native target non-stop mode support:

- Not stop all threads when a thread stops.

- Be sure we're not reading registers and memory from
  running threads.

- Add threads to the thread table as soon as we detect
  them.

- Avoid using ptrace on running threads.

- Implement target_stop_ptid to interrupt only
  one thread

- Getting the last pending event of a thread is
  different in nonstop due to the fact that
  stop_signal is per-thread in non-stop mode.

-- 
Pedro Alves
2008-06-15  Pedro Alves  <pedro@codesourcery.com>

	* linux-fork.c (linux_fork_killall): Use SIGKILL instead of
	PTRACE_KILL.

	* linux-nat.c (find_lwp_pid): Make public.
	(get_pending_status): Implement non-stop mode.
	(sigint_clear_callback): New.
	(linux_nat_resume): In non-stop mode, always resume only a single
	PTID.  Clear the sigint flag.
	(linux_handle_extended_wait): On a clone event, add new lwp to
	GDB's thread table, and mark as running, executing and stopped
	appropriatelly.
	(linux_nat_filter_event): Don't assume there are other running
	threads when a thread exits.
	(linux_nat_wait): Mark the main thread as running and executing.
	In non-stop mode, don't stop all lwps.
	(kill_callback): If lwp is not stopped, use SIGKILL.
	(linux_nat_thread_alive): Use signal 0 to detect if a thread is
	alive.
	(send_sigint_callback): New.
	(linux_nat_stop_ptid): New.
	(linux_nat_add_target): Set to_stop_ptid to linux_nat_stop_ptid.

	* linux-nat.h (struct lwp_info): Add sigint field.
	(find_lwp_pid): Declare.

	* linux-thread-db.c (thread_from_lwp, enable_thread_event)
	(check_event): Set proc_handle.pid to the stopped lwp.
	(thread_db_find_new_threads): If current lwp is executing, don't
	try to read from it.

---
 gdb/linux-fork.c      |    4 
 gdb/linux-nat.c       |  256 ++++++++++++++++++++++++++++++++++++++++----------
 gdb/linux-nat.h       |    6 +
 gdb/linux-thread-db.c |   15 ++
 4 files changed, 233 insertions(+), 48 deletions(-)

Index: src/gdb/linux-fork.c
===================================================================
--- src.orig/gdb/linux-fork.c	2008-06-15 20:25:42.000000000 +0100
+++ src/gdb/linux-fork.c	2008-06-15 20:56:56.000000000 +0100
@@ -337,7 +337,9 @@ linux_fork_killall (void)
     {
       pid = PIDGET (fp->ptid);
       do {
-	ptrace (PT_KILL, pid, 0, 0);
+	/* Use SIGKILL instead of PTRACE_KILL because the former works even
+	   if the thread is running, while the later doesn't.  */
+	kill (pid, SIGKILL);
 	ret = waitpid (pid, &status, 0);
 	/* We might get a SIGCHLD instead of an exit status.  This is
 	 aggravated by the first kill above - a child has just
Index: src/gdb/linux-nat.c
===================================================================
--- src.orig/gdb/linux-nat.c	2008-06-15 20:25:42.000000000 +0100
+++ src/gdb/linux-nat.c	2008-06-15 20:57:27.000000000 +0100
@@ -212,6 +212,10 @@ static void linux_nat_async (void (*call
 static int linux_nat_async_mask (int mask);
 static int kill_lwp (int lwpid, int signo);
 
+static int send_sigint_callback (struct lwp_info *lp, void *data);
+
+static int stop_callback (struct lwp_info *lp, void *data);
+
 /* Captures the result of a successful waitpid call, along with the
    options used in that call.  */
 struct waitpid_result
@@ -415,6 +419,8 @@ linux_test_for_tracefork (int original_p
   int child_pid, ret, status;
   long second_pid;
 
+  int events_enabled = linux_nat_async_events (0);
+
   linux_supports_tracefork_flag = 0;
   linux_supports_tracevforkdone_flag = 0;
 
@@ -454,6 +460,7 @@ linux_test_for_tracefork (int original_p
 	warning (_("linux_test_for_tracefork: unexpected wait status 0x%x from "
 		 "killed child"), status);
 
+      linux_nat_async_events (events_enabled);
       return;
     }
 
@@ -493,6 +500,8 @@ linux_test_for_tracefork (int original_p
   if (ret != 0)
     warning (_("linux_test_for_tracefork: failed to kill child"));
   my_waitpid (child_pid, &status, 0);
+
+  linux_nat_async_events (events_enabled);
 }
 
 /* Return non-zero iff we have tracefork functionality available.
@@ -920,7 +929,7 @@ delete_lwp (ptid_t ptid)
 /* Return a pointer to the structure describing the LWP corresponding
    to PID.  If no corresponding LWP could be found, return NULL.  */
 
-static struct lwp_info *
+struct lwp_info *
 find_lwp_pid (ptid_t ptid)
 {
   struct lwp_info *lp;
@@ -1306,16 +1315,76 @@ get_pending_status (struct lwp_info *lp,
      events are always cached in waitpid_queue.  */
 
   *status = 0;
-  if (GET_LWP (lp->ptid) == GET_LWP (last_ptid))
+
+  if (non_stop)
     {
-      if (stop_signal != TARGET_SIGNAL_0
-	  && signal_pass_state (stop_signal))
-	*status = W_STOPCODE (target_signal_to_host (stop_signal));
+      enum target_signal signo = TARGET_SIGNAL_0;
+
+      if (is_executing (lp->ptid))
+	{
+	  /* If the core thought this lwp was executing, we can only
+	     have pending events in the local queue.  */
+	  if (queued_waitpid (GET_LWP (lp->ptid), status, __WALL) != -1)
+	    {
+	      if (WIFSTOPPED (status))
+		signo = target_signal_from_host (WSTOPSIG (status));
+
+	      /* If not stopped, then the lwp is gone, no use in
+		 resending a signal.  */
+	    }
+	}
+      else
+	{
+	  /* If the core knows the thread is not executing, then we
+	     have then last signal recorded in
+	     thread_info->stop_signal, unless this is inferior_ptid,
+	     in which case, it's in the global stop_signal, due to
+	     context switching.  */
+
+	  if (ptid_equal (lp->ptid, inferior_ptid))
+	    signo = stop_signal;
+	  else
+	    {
+	      struct thread_info *tp = find_thread_pid (lp->ptid);
+	      gdb_assert (tp);
+	      signo = tp->stop_signal;
+	    }
+	}
+
+      if (signo != TARGET_SIGNAL_0
+	  && !signal_pass_state (signo))
+	{
+	  if (debug_linux_nat)
+	    fprintf_unfiltered (gdb_stdlog, "\
+GPT: lwp %s had signal %s, but it is in no pass state\n",
+				target_pid_to_str (lp->ptid),
+				target_signal_to_string (signo));
+	}
+      else
+	{
+	  if (signo != TARGET_SIGNAL_0)
+	    *status = W_STOPCODE (target_signal_to_host (signo));
+
+	  if (debug_linux_nat)
+	    fprintf_unfiltered (gdb_stdlog,
+				"GPT: lwp %s as pending signal %s\n",
+				target_pid_to_str (lp->ptid),
+				target_signal_to_string (signo));
+	}
     }
-  else if (target_can_async_p ())
-    queued_waitpid (GET_LWP (lp->ptid), status, __WALL);
   else
-    *status = lp->status;
+    {
+      if (GET_LWP (lp->ptid) == GET_LWP (last_ptid))
+	{
+	  if (stop_signal != TARGET_SIGNAL_0
+	      && signal_pass_state (stop_signal))
+	    *status = W_STOPCODE (target_signal_to_host (stop_signal));
+	}
+      else if (target_can_async_p ())
+	queued_waitpid (GET_LWP (lp->ptid), status, __WALL);
+      else
+	*status = lp->status;
+    }
 
   return 0;
 }
@@ -1379,6 +1448,13 @@ linux_nat_detach (char *args, int from_t
   if (target_can_async_p ())
     linux_nat_async (NULL, 0);
 
+  /* Stop all threads before detaching.  ptrace requires that the
+     thread is stopped to sucessfully detach.  */
+  iterate_over_lwps (stop_callback, NULL);
+  /* ... and wait until all of them have reported back that
+     they're no longer running.  */
+  iterate_over_lwps (stop_wait_callback, NULL);
+
   iterate_over_lwps (detach_callback, NULL);
 
   /* Only the initial process should be left right now.  */
@@ -1445,6 +1521,13 @@ resume_set_callback (struct lwp_info *lp
   return 0;
 }
 
+static int
+sigint_clear_callback (struct lwp_info *lp, void *data)
+{
+  lp->sigint = 0;
+  return 0;
+}
+
 static void
 linux_nat_resume (ptid_t ptid, int step, enum target_signal signo)
 {
@@ -1468,10 +1551,17 @@ linux_nat_resume (ptid_t ptid, int step,
   /* A specific PTID means `step only this process id'.  */
   resume_all = (PIDGET (ptid) == -1);
 
-  if (resume_all)
-    iterate_over_lwps (resume_set_callback, NULL);
-  else
-    iterate_over_lwps (resume_clear_callback, NULL);
+  if (non_stop && resume_all)
+    internal_error (__FILE__, __LINE__,
+		    "can't resume all in non-stop mode");
+
+  if (!non_stop)
+    {
+      if (resume_all)
+	iterate_over_lwps (resume_set_callback, NULL);
+      else
+	iterate_over_lwps (resume_clear_callback, NULL);
+    }
 
   /* If PID is -1, it's the current inferior that should be
      handled specially.  */
@@ -1481,6 +1571,7 @@ linux_nat_resume (ptid_t ptid, int step,
   lp = find_lwp_pid (ptid);
   gdb_assert (lp != NULL);
 
+  /* Convert to something the lower layer understands.  */
   ptid = pid_to_ptid (GET_LWP (lp->ptid));
 
   /* Remember if we're stepping.  */
@@ -1489,6 +1580,9 @@ linux_nat_resume (ptid_t ptid, int step,
   /* Mark this LWP as resumed.  */
   lp->resumed = 1;
 
+  /* Remove the SIGINT mark.  Used in non-stop mode.  */
+  lp->sigint = 0;
+
   /* If we have a pending wait status for this thread, there is no
      point in resuming the process.  But first make sure that
      linux_nat_wait won't preemptively handle the event - we
@@ -1631,6 +1725,8 @@ linux_handle_extended_wait (struct lwp_i
 	ourstatus->kind = TARGET_WAITKIND_VFORKED;
       else
 	{
+	  struct cleanup *old_chain;
+
 	  ourstatus->kind = TARGET_WAITKIND_IGNORE;
 	  new_lp = add_lwp (BUILD_LWP (new_pid, GET_PID (inferior_ptid)));
 	  new_lp->cloned = 1;
@@ -1650,20 +1746,43 @@ linux_handle_extended_wait (struct lwp_i
 	  else
 	    status = 0;
 
+	  /* Make thread_db aware of this thread.  We do this this
+	     early, so in non-stop mode, threads show up as they're
+	     created, instead of on next stop.  thread_db needs a
+	     stopped inferior_ptid --- since we know LP is stopped,
+	     use it this time.  */
+	  old_chain = save_inferior_ptid ();
+	  inferior_ptid = lp->ptid;
+	  lp->stopped = 1;
+	  target_find_new_threads ();
+	  do_cleanups (old_chain);
+	  if (!in_thread_list (new_lp->ptid))
+	    {
+	      /* We're not using thread_db.  Attach and add it to
+		 GDB's list.  */
+	      lin_lwp_attach_lwp (new_lp->ptid);
+	      target_post_attach (GET_LWP (new_lp->ptid));
+	      add_thread (new_lp->ptid);
+	    }
+
 	  if (stopping)
 	    new_lp->stopped = 1;
 	  else
 	    {
+ 	      new_lp->stopped = 0;
 	      new_lp->resumed = 1;
 	      ptrace (PTRACE_CONT,
 		      PIDGET (lp->waitstatus.value.related_pid), 0,
 		      status ? WSTOPSIG (status) : 0);
+	      set_running (new_lp->ptid, 1);
+	      set_executing (new_lp->ptid, 1);
 	    }
 
 	  if (debug_linux_nat)
 	    fprintf_unfiltered (gdb_stdlog,
 				"LHEW: Got clone event from LWP %ld, resuming\n",
 				GET_LWP (lp->ptid));
+	  lp->stopped = 0;
 	  ptrace (PTRACE_CONT, GET_LWP (lp->ptid), 0, 0);
 
 	  return 1;
@@ -2383,13 +2502,7 @@ linux_nat_filter_event (int lwpid, int s
 	 not the end of the debugged application and should be
 	 ignored.  */
       if (num_lwps > 0)
-	{
-	  /* Make sure there is at least one thread running.  */
-	  gdb_assert (iterate_over_lwps (running_callback, NULL));
-
-	  /* Discard the event.  */
-	  return NULL;
-	}
+	return NULL;
     }
 
   /* Check if the current LWP has previously exited.  In the nptl
@@ -2519,6 +2632,8 @@ linux_nat_wait (ptid_t ptid, struct targ
       lp->resumed = 1;
       /* Add the main thread to GDB's thread list.  */
       add_thread_silent (lp->ptid);
+      set_running (lp->ptid, 1);
+      set_executing (lp->ptid, 1);
     }
 
   sigemptyset (&flush_mask);
@@ -2747,19 +2862,23 @@ retry:
     fprintf_unfiltered (gdb_stdlog, "LLW: Candidate event %s in %s.\n",
 			status_to_str (status), target_pid_to_str (lp->ptid));
 
-  /* Now stop all other LWP's ...  */
-  iterate_over_lwps (stop_callback, NULL);
+  if (!non_stop)
+    {
+      /* Now stop all other LWP's ...  */
+      iterate_over_lwps (stop_callback, NULL);
 
-  /* ... and wait until all of them have reported back that they're no
-     longer running.  */
-  iterate_over_lwps (stop_wait_callback, &flush_mask);
-  iterate_over_lwps (flush_callback, &flush_mask);
-
-  /* If we're not waiting for a specific LWP, choose an event LWP from
-     among those that have had events.  Giving equal priority to all
-     LWPs that have had events helps prevent starvation.  */
-  if (pid == -1)
-    select_event_lwp (&lp, &status);
+      /* ... and wait until all of them have reported back that
+	 they're no longer running.  */
+      iterate_over_lwps (stop_wait_callback, &flush_mask);
+      iterate_over_lwps (flush_callback, &flush_mask);
+
+      /* If we're not waiting for a specific LWP, choose an event LWP
+	 from among those that have had events.  Giving equal priority
+	 to all LWPs that have had events helps prevent
+	 starvation.  */
+      if (pid == -1)
+	select_event_lwp (&lp, &status);
+    }
 
   /* Now that we've selected our final event LWP, cancel any
      breakpoints in other LWPs that have hit a GDB breakpoint.  See
@@ -2796,13 +2915,26 @@ static int
 kill_callback (struct lwp_info *lp, void *data)
 {
   errno = 0;
-  ptrace (PTRACE_KILL, GET_LWP (lp->ptid), 0, 0);
-  if (debug_linux_nat)
-    fprintf_unfiltered (gdb_stdlog,
-			"KC:  PTRACE_KILL %s, 0, 0 (%s)\n",
-			target_pid_to_str (lp->ptid),
-			errno ? safe_strerror (errno) : "OK");
 
+  /* PTRACE_KILL doesn't work when the thread is running.  */
+  if (!lp->stopped)
+    {
+      kill_lwp (GET_LWP (lp->ptid), SIGKILL);
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "KC:  kill_lwp (SIGKILL) %s (%s)\n",
+			    target_pid_to_str (lp->ptid),
+			    errno ? safe_strerror (errno) : "OK");
+    }
+  else
+    {
+      ptrace (PTRACE_KILL, GET_LWP (lp->ptid), 0, 0);
+      if (debug_linux_nat)
+	fprintf_unfiltered (gdb_stdlog,
+			    "KC:  PTRACE_KILL %s, 0, 0 (%s)\n",
+			    target_pid_to_str (lp->ptid),
+			    errno ? safe_strerror (errno) : "OK");
+    }
   return 0;
 }
 
@@ -2943,22 +3075,22 @@ linux_nat_xfer_partial (struct target_op
 static int
 linux_nat_thread_alive (ptid_t ptid)
 {
+  int err;
+
   gdb_assert (is_lwp (ptid));
 
-  errno = 0;
-  ptrace (PTRACE_PEEKUSER, GET_LWP (ptid), 0, 0);
+  /* Send signal 0 instead of anything ptrace, because ptracing a
+     running thread errors out claiming that the thread doesn't
+     exist.  */
+  err = kill_lwp (GET_LWP (ptid), 0);
+
   if (debug_linux_nat)
     fprintf_unfiltered (gdb_stdlog,
-			"LLTA: PTRACE_PEEKUSER %s, 0, 0 (%s)\n",
+			"LLTA: KILL(SIG0) %s (%s)\n",
 			target_pid_to_str (ptid),
-			errno ? safe_strerror (errno) : "OK");
+			err ? safe_strerror (err) : "OK");
 
-  /* Not every Linux kernel implements PTRACE_PEEKUSER.  But we can
-     handle that case gracefully since ptrace will first do a lookup
-     for the process based upon the passed-in pid.  If that fails we
-     will get either -ESRCH or -EPERM, otherwise the child exists and
-     is alive.  */
-  if (errno == ESRCH || errno == EPERM)
+  if (err != 0)
     return 0;
 
   return 1;
@@ -4140,6 +4272,33 @@ linux_nat_set_async_mode (int on)
   linux_nat_async_enabled = on;
 }
 
+static int
+send_sigint_callback (struct lwp_info *lp, void *data)
+{
+  /* Use is_stopped instead of lp->stopped, because the lwp may be
+     stopped due to an internal event, and we want to interrupt it in
+     that case too.  What we want is to check if the thread is stopped
+     from the point of view of the user.  */
+  if (!is_stopped (lp->ptid) && !lp->sigint)
+    {
+      kill_lwp (GET_LWP (lp->ptid), SIGINT);
+      lp->sigint = 1;
+    }
+  return 0;
+}
+
+static void
+linux_nat_stop_ptid (ptid_t ptid)
+{
+  if (ptid_equal (ptid, minus_one_ptid))
+    iterate_over_lwps (send_sigint_callback, &ptid);
+  else
+    {
+      struct lwp_info *lp = find_lwp_pid (ptid);
+      send_sigint_callback (lp, NULL);
+    }
+}
+
 void
 linux_nat_add_target (struct target_ops *t)
 {
@@ -4170,6 +4329,9 @@ linux_nat_add_target (struct target_ops 
   t->to_terminal_inferior = linux_nat_terminal_inferior;
   t->to_terminal_ours = linux_nat_terminal_ours;
 
+  /* Methods for non-stop support.  */
+  t->to_stop_ptid = linux_nat_stop_ptid;
+
   /* We don't change the stratum; this target will sit at
      process_stratum and thread_db will set at thread_stratum.  This
      is a little strange, since this is a multi-threaded-capable
Index: src/gdb/linux-nat.h
===================================================================
--- src.orig/gdb/linux-nat.h	2008-06-15 20:25:42.000000000 +0100
+++ src/gdb/linux-nat.h	2008-06-15 20:27:15.000000000 +0100
@@ -37,6 +37,10 @@ struct lwp_info
      SIGCHLD.  */
   int cloned;
 
+  /* Non-zero if we sent this LWP a SIGINT (but the LWP didn't report
+     it back yet).  */
+  int sigint;
+
   /* Non-zero if we sent this LWP a SIGSTOP (but the LWP didn't report
      it back yet).  */
   int signalled;
@@ -88,6 +92,8 @@ extern struct lwp_info *lwp_list;
 #define is_lwp(ptid)		(GET_LWP (ptid) != 0)
 #define BUILD_LWP(lwp, pid)	ptid_build (pid, lwp, 0)
 
+struct lwp_info *find_lwp_pid (ptid_t ptid);
+
 /* Attempt to initialize libthread_db.  */
 void check_for_thread_db (void);
 
Index: src/gdb/linux-thread-db.c
===================================================================
--- src.orig/gdb/linux-thread-db.c	2008-06-15 20:25:41.000000000 +0100
+++ src/gdb/linux-thread-db.c	2008-06-15 20:27:15.000000000 +0100
@@ -308,6 +308,8 @@ thread_from_lwp (ptid_t ptid)
      LWP.  */
   gdb_assert (GET_LWP (ptid) != 0);
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (ptid);
   err = td_ta_map_lwp2thr_p (thread_agent, GET_LWP (ptid), &th);
   if (err != TD_OK)
     error (_("Cannot find user-level thread for LWP %ld: %s"),
@@ -418,6 +420,9 @@ enable_thread_event (td_thragent_t *thre
   td_notify_t notify;
   td_err_e err;
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (inferior_ptid);
+
   /* Get the breakpoint address for thread EVENT.  */
   err = td_ta_event_addr_p (thread_agent, event, &notify);
   if (err != TD_OK)
@@ -761,6 +766,9 @@ check_event (ptid_t ptid)
   if (stop_pc != td_create_bp_addr && stop_pc != td_death_bp_addr)
     return;
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (ptid);
+
   /* If we are at a create breakpoint, we do not know what new lwp
      was created and cannot specifically locate the event message for it.
      We have to call td_ta_event_getmsg() to get
@@ -955,7 +963,14 @@ static void
 thread_db_find_new_threads (void)
 {
   td_err_e err;
+  struct lwp_info *lp = find_lwp_pid (inferior_ptid);
+
+  if (!lp || !lp->stopped)
+    /* In linux, we can only read memory through a stopped lwp.  */
+    return;
 
+  /* Access an lwp we know is stopped.  */
+  proc_handle.pid = GET_LWP (inferior_ptid);
   /* Iterate over all user-space threads to discover new threads.  */
   err = td_ta_thr_iter_p (thread_agent, find_new_threads_callback, NULL,
 			  TD_THR_ANY_STATE, TD_THR_LOWEST_PRIORITY,

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]