gprof profiling of multi-threaded Cygwin programs

Mark Geisert mark@maxrnd.com
Wed Feb 17 05:28:00 GMT 2016


I've attached a patch set modifying Cygwin's profiling support to sample PC 
values of all an application's threads, not just the main thread.  There is no 
change to how profiling is requested: just compile and link the app with "-pg" 
as usual.  The profiling info is dumped into file gmon.out as usual.

There is a behavioral change that ought to be documented somewhere:  If a 
gmon.out file exists when a profiled application exits, the app will now dump 
its profiling info into another file gmon.outXXXXXX where mkstemp() replaces the 
Xs with random alphanumerics.  I added this functionality to allow a profiled 
program to fork() yet retain profiling info for both parent and child.  The old 
behavior was to simply overwrite any existing gmon.out file.

There is no change to the normal Cygwin execution paths if profiling is not 
enabled.  And when it is enabled, only the one profiling thread per profiled app 
is doing more work than it used to.

Here's a change log of the modifications; all files are in winsup/cygwin:

         * common.din (cygheap_profthr_all): Export.
         * cygheap.cc (cygheap_profthr_all): Implement a C-callable function
         that runs cygheap's threadlist handing each pthread's thread handle
         in turn to profthr_byhandle().
         * gmon.c (_mcleanup): Added support for multiple simultaneous
         gmon.out* files created when necessary using mkstemp(). Added
         #include <errno.h>, added extern decl for _setmode().
         * gmon.h (struct gmonparam): Made state decl volatile.
         * mcount.c (_MCOUNT_DECL): Changed stores into gmonparam.state to use
         Interlocked operations. Added #include "winsup.h", updated commentary.
         * profil.c (profthr_byhandle): New function abstracting out the
         updating of profile counters based on a thread handle.
         (profthr_func): Updated to call profthr_byhandle() to sample the main
         thread then call cygheap_profthr_all() to sample all other pthreads.

Thanks for reading,

..mark
-------------- next part --------------
diff --git a/winsup/cygwin/common.din b/winsup/cygwin/common.din
index 9584d09..243fd01 100644
--- a/winsup/cygwin/common.din
+++ b/winsup/cygwin/common.din
@@ -269,6 +269,7 @@ ctime SIGFE
 ctime_r SIGFE
 cuserid NOSIGFE
 cwait SIGFE
+cygheap_profthr_all NOSIGFE
 cygwin_attach_handle_to_fd SIGFE
 cygwin_conv_path SIGFE
 cygwin_conv_path_list SIGFE
diff --git a/winsup/cygwin/cygheap.cc b/winsup/cygwin/cygheap.cc
index 6493485..5159d30 100644
--- a/winsup/cygwin/cygheap.cc
+++ b/winsup/cygwin/cygheap.cc
@@ -744,3 +744,16 @@ init_cygheap::find_tls (int sig, bool& issig_wait)
     WaitForSingleObject (t->mutex, INFINITE);
   return t;
 }
+
+/* Called from profil.c to sample all non-main thread PC values for profiling */
+extern "C" void
+cygheap_profthr_all (void (*profthr_byhandle) (HANDLE))
+{
+  int ix = -1;
+  while (++ix < (int) nthreads)
+    {
+      _cygtls *tls = cygheap->threadlist[ix].thread;
+      if (tls->tid)
+	profthr_byhandle (tls->tid->win32_obj_id);
+    }
+}
diff --git a/winsup/cygwin/gmon.c b/winsup/cygwin/gmon.c
index 96b1189..0b7ecc0 100644
--- a/winsup/cygwin/gmon.c
+++ b/winsup/cygwin/gmon.c
@@ -36,6 +36,7 @@ static char rcsid[] = "$OpenBSD: gmon.c,v 1.8 1997/07/23 21:11:27 kstailey Exp $
  * The differences should be within __MINGW32__ guard.
  */
 
+#include <errno.h>
 #include <fcntl.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -49,6 +50,7 @@ static char rcsid[] = "$OpenBSD: gmon.c,v 1.8 1997/07/23 21:11:27 kstailey Exp $
 
 /* XXX needed? */
 //extern char *minbrk __asm ("minbrk");
+extern int _setmode(int, int);
 
 #ifdef _WIN64
 #define MINUS_ONE_P (-1LL)
@@ -152,6 +154,7 @@ void
 _mcleanup(void)
 {
 	static char gmon_out[] = "gmon.out";
+	static char gmon_template[] = "gmon.outXXXXXX";
 	int fd;
 	int hz;
 	int fromindex;
@@ -222,7 +225,14 @@ _mcleanup(void)
 	proffile = gmon_out;
 #endif
 
-	fd = open(proffile , O_CREAT|O_TRUNC|O_WRONLY|O_BINARY, 0666);
+	fd = open(proffile, O_CREAT|O_EXCL|O_TRUNC|O_WRONLY|O_BINARY, 0666);
+	if (fd < 0 && errno == EEXIST) {
+		fd = mkstemp(gmon_template);
+		if (fd >= 0) {
+			_setmode(fd, O_BINARY);
+			fchmod(fd, 0644);
+		}
+	}
 	if (fd < 0) {
 		perror( proffile );
 		return;
diff --git a/winsup/cygwin/gmon.h b/winsup/cygwin/gmon.h
index 0932ed9..b0fb479 100644
--- a/winsup/cygwin/gmon.h
+++ b/winsup/cygwin/gmon.h
@@ -153,7 +153,7 @@ struct rawarc {
  * The profiling data structures are housed in this structure.
  */
 struct gmonparam {
-	int		state;
+	volatile int	state;
 	u_short		*kcount;
 	size_t		kcountsize;
 	u_short		*froms;
diff --git a/winsup/cygwin/mcount.c b/winsup/cygwin/mcount.c
index fad6728..6111b35 100644
--- a/winsup/cygwin/mcount.c
+++ b/winsup/cygwin/mcount.c
@@ -41,6 +41,7 @@ static char rcsid[] = "$OpenBSD: mcount.c,v 1.6 1997/07/23 21:11:27 kstailey Exp
 #endif
 #include <sys/types.h>
 #include "gmon.h"
+#include "winsup.h"
 
 /*
  * mcount is called on entry to each function compiled with the profiling
@@ -70,11 +71,12 @@ _MCOUNT_DECL (size_t frompc, size_t selfpc)
 	p = &_gmonparam;
 	/*
 	 * check that we are profiling
-	 * and that we aren't recursively invoked.
+	 * and that we aren't recursively invoked by this thread
+	 * or entered anew by any other thread.
 	 */
-	if (p->state != GMON_PROF_ON)
+	if (InterlockedCompareExchange (
+		    &p->state, GMON_PROF_BUSY, GMON_PROF_ON) != GMON_PROF_ON)
 		return;
-	p->state = GMON_PROF_BUSY;
 	/*
 	 * check that frompcindex is a reasonable pc value.
 	 * for example:	signal catchers get called from the stack,
@@ -162,10 +164,10 @@ _MCOUNT_DECL (size_t frompc, size_t selfpc)
 		}
 	}
 done:
-	p->state = GMON_PROF_ON;
+	InterlockedExchange (&p->state, GMON_PROF_ON);
 	return;
 overflow:
-	p->state = GMON_PROF_ERROR;
+	InterlockedExchange (&p->state, GMON_PROF_ERROR);
 	return;
 }
 
diff --git a/winsup/cygwin/profil.c b/winsup/cygwin/profil.c
index eb41c08..f76fda5 100644
--- a/winsup/cygwin/profil.c
+++ b/winsup/cygwin/profil.c
@@ -28,6 +28,8 @@
 /* global profinfo for profil() call */
 static struct profinfo prof;
 
+extern void cygheap_profthr_all (void (*) (HANDLE));
+
 /* Get the pc for thread THR */
 
 static size_t
@@ -65,25 +67,36 @@ print_prof (struct profinfo *p)
 }
 #endif
 
-/* Everytime we wake up use the main thread pc to hash into the cell in the
-   profile buffer ARG. */
+/* Every time we wake up sample the main thread's pc to hash into the cell
+   in the profile buffer ARG.  Then all other pthreads' pc's are sampled.  */
 
-static void CALLBACK profthr_func (LPVOID);
+static void
+profthr_byhandle (HANDLE thr)
+{
+  size_t idx;
+  size_t pc = (size_t) get_thrpc (thr);
+
+  // code assumes there is only one profinfo in play: the static prof up top
+  if (pc >= prof.lowpc && pc < prof.highpc)
+    {
+      idx = PROFIDX (pc, prof.lowpc, prof.scale);
+      prof.counter[idx]++;
+    }
+}
 
 static void CALLBACK
 profthr_func (LPVOID arg)
 {
   struct profinfo *p = (struct profinfo *) arg;
-  size_t pc, idx;
 
   for (;;)
     {
-      pc = (size_t) get_thrpc (p->targthr);
-      if (pc >= p->lowpc && pc < p->highpc)
-	{
-	  idx = PROFIDX (pc, p->lowpc, p->scale);
-	  p->counter[idx]++;
-	}
+      // record profiling sample for main thread
+      profthr_byhandle (p->targthr);
+
+      // record profiling samples for other pthreads, if any
+      cygheap_profthr_all (profthr_byhandle);
+
 #if 0
       print_prof (p);
 #endif


More information about the Cygwin-patches mailing list