This is the mail archive of the libc-alpha@sources.redhat.com mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]

RFC: Profiling multiple DSOs


Here is a patch to support profiling multiple DSOs. It works with

# LD_PROFILE_ALL=1 ./a.out
# LD_PROFILE=libc.so.6:foo.so:bar.so ./a.out

Any comments?


H.J.
2001-09-25  H.J. Lu  <hjl@gnu.org>

	* elf/Versions (ld): Add _dl_stop_profile to GLIBC_2.2.5.

	* elf/dl-close.c (_dl_close): Call _dl_stop_profile if
	_dl_profile != NULL.

	* elf/dl-open.c (dl_open_worker): Call _dl_start_profile if
	_dl_profile != NULL.

	* elf/dl-profile.c: Updated to support profiling multiple
	DSOs.

	* elf/dl-profstub.c (_dl_profile_map): Removed.
	(_dl_mcount_wrapper): Check _dl_profile instead of
	_dl_profile_map.

	* elf/dl-support.c (_dl_profile_map): Removed.
	* elf/rtld.c: Likewise.
	* sysdeps/generic/ldsodefs.h: Likewise.

	* elf/rtld.c (dl_main): Call _dl_init_profile if necessary.
	Call _dl_start_profile after relocating an object
	(process_envvars): Support LD_PROFILE_ALL.

	* sysdeps/generic/ldsodefs.h (_dl_init_profile): New.
	(_dl_stop_profile): Likewise.

	* /sysdeps/i386/dl-machine.h (elf_machine_runtime_setup): Don't
	set _dl_profile_map.

	* sysdeps/posix/dl-profil.h: New.

--- libc/elf/Versions.prof-dso	Fri Jul 27 11:01:30 2001
+++ libc/elf/Versions	Tue Sep 25 17:00:29 2001
@@ -66,4 +66,7 @@ ld {
   GLIBC_2.2.3 {
     _dl_debug_mask; _dl_debug_printf;
   }
+  GLIBC_2.2.5 {
+    _dl_stop_profile;
+  }
 }
--- libc/elf/dl-close.c.prof-dso	Tue Sep 11 07:42:39 2001
+++ libc/elf/dl-close.c	Tue Sep 25 17:00:29 2001
@@ -206,6 +206,9 @@ _dl_close (void *_map)
 	      --_dl_main_searchlist->r_nlist;
 	    }
 
+	  if (__builtin_expect (_dl_profile != NULL, 0))
+	    _dl_stop_profile (imap);
+
 	  /* We can unmap all the maps at once.  We determined the
 	     start address and length when we loaded the object and
 	     the `munmap' call does the rest.  */
--- libc/elf/dl-open.c.prof-dso	Tue Sep 25 16:53:56 2001
+++ libc/elf/dl-open.c	Tue Sep 25 17:24:55 2001
@@ -277,17 +277,12 @@ dl_open_worker (void *a)
 	  if (_dl_profile != NULL)
 	    {
 	      /* If this here is the shared object which we want to profile
-		 make sure the profile is started.  We can find out whether
-	         this is necessary or not by observing the `_dl_profile_map'
-	         variable.  If was NULL but is not NULL afterwars we must
-		 start the profiling.  */
-	      struct link_map *old_profile_map = _dl_profile_map;
-
+		 make sure the profile is started.  */
 	      _dl_relocate_object (l, l->l_scope, 1, 1);
 
-	      if (old_profile_map == NULL && _dl_profile_map != NULL)
+	      if (_dl_profile != NULL)
 		/* We must prepare the profiling.  */
-		_dl_start_profile (_dl_profile_map, _dl_profile_output);
+		_dl_start_profile (l, _dl_profile_output);
 	    }
 	  else
 #endif
--- libc/elf/dl-profile.c.prof-dso	Sat Sep 22 23:28:25 2001
+++ libc/elf/dl-profile.c	Tue Sep 25 17:39:49 2001
@@ -34,6 +34,11 @@
 #include <sys/param.h>
 #include <sys/stat.h>
 #include <atomicity.h>
+#ifdef _LIBC_REENTRANT
+# include <pt-machine.h>
+# include <signal.h>
+# include <time.h>
+#endif
 
 /* The LD_PROFILE feature has to be implemented different to the
    normal profiling using the gmon/ functions.  The problem is that an
@@ -132,40 +137,271 @@ struct here_cg_arc_record
     uint32_t count;
   } __attribute__ ((packed));
 
-static struct here_cg_arc_record *data;
-
-/* This is the number of entry which have been incorporated in the toset.  */
-static uint32_t narcs;
-/* This is a pointer to the object representing the number of entries
-   currently in the mmaped file.  At no point of time this has to be the
-   same as NARCS.  If it is equal all entries from the file are in our
-   lists.  */
-static volatile uint32_t *narcsp;
-
-/* Description of the currently profiled object.  */
-static long int state = GMON_PROF_OFF;
-
-static volatile uint16_t *kcount;
-static size_t kcountsize;
-
 struct here_fromstruct
   {
     struct here_cg_arc_record volatile *here;
     uint16_t link;
   };
 
-static volatile uint16_t *tos;
+struct dl_profile_record
+{
+  struct link_map *map;
+
+  struct here_cg_arc_record *data;
+
+  /* This is the number of entry which have been incorporated in the
+      toset.  */
+  uint32_t narcs;
+  /* This is a pointer to the object representing the number of entries
+     currently in the mmaped file.  At no point of time this has to be
+     the same as NARCS.  If it is equal all entries from the file are
+     in our lists.  */
+  volatile uint32_t *narcsp;
+
+  /* Description of the currently profiled object.  */
+  long int state;
+
+  volatile uint16_t *kcount;
+  size_t kcountsize;
+  int scale;
+
+  volatile uint16_t *tos;
+
+  struct here_fromstruct *froms;
+  uint32_t fromlimit;
+  volatile uint32_t fromidx;
+
+  uintptr_t lowpc;
+  uintptr_t highpc;
+  size_t textsize;
+  unsigned int hashfraction;
+  unsigned int log_hashfraction;
+
+  struct dl_profile_record *next;
+};
+
+struct dl_profile_record_table
+{
+  struct dl_profile_record_table *next;
+  /* # of entries in fdesc table */
+  unsigned int len;
+  /* index of first available entry */
+  volatile long int first_unused;
+  struct dl_profile_record record [0];
+};
+
+static
+struct local
+{
+  struct dl_profile_record_table *root;
+  struct dl_profile_record *list;
+  struct dl_profile_record *free_list;
+  int npages;
+#ifdef _LIBC_REENTRANT
+  volatile int lock;
+  sigset_t full_sigset;
+#endif
+} local;
+
+/* Locking is tricky: we may get a signal while holding the lock and
+   the signal handler may end up calling into the dynamic loader
+   again.  Also, if a real-time process spins on the lock, a
+   non-realtime process may never get the chance to release it's lock,
+   unless the realtime process relinquishes the CPU from time to time.
+   Hence we (a) block signals before acquiring the lock and (b) do a
+   nanosleep() when we detect prolongued contention.  */
+#ifdef _LIBC_REENTRANT
+# define lock(l)						\
+{								\
+  sigset_t _saved_set;						\
+  int i = 10000;						\
+  if (!__sigismember (&(l)->full_sigset, SIGINT))		\
+    __sigfillset (&(l)->full_sigset);				\
+								\
+  while (testandset ((int *) &(l)->lock))			\
+    {								\
+      struct timespec ts;					\
+      if (i > 0)						\
+	{							\
+	  --i;							\
+	  continue;						\
+	}							\
+      ts.tv_sec = 0;						\
+      ts.tv_nsec = 1*1000*1000;					\
+      __nanosleep (&ts, NULL);					\
+    }								\
+  __sigprocmask (SIG_BLOCK, &(l)->full_sigset, &_saved_set);
+# define unlock(l)						\
+  __sigprocmask (SIG_SETMASK, &_saved_set, NULL);		\
+  (l)->lock = 0;						\
+}
+#else
+# define lock(l)
+# define unlock(l)
+#endif
+
+#ifdef MAP_ANON
+#define ANONFD (-1)
+#else
+extern int _dl_zerofd;
+#define ANONFD (_dl_zerofd)
+#endif
+
+static const char *profile_list;
+static int profile_all;
+
+static struct dl_profile_record_table *
+internal_function
+new_profile_record_table (struct local *l)
+{
+  size_t size = l->npages * _dl_pagesize;
+  struct dl_profile_record_table *new_table;
+
+  new_table = __mmap (0, size, PROT_READ | PROT_WRITE,
+		      MAP_ANON | MAP_PRIVATE, ANONFD, 0);
+  if (new_table == MAP_FAILED)
+    {
+      char buf[400];
+      int errnum = errno;
+      _dl_error_printf ("Cannot map pages for profiling record table: %s\n",
+			__strerror_r (errnum, buf, sizeof buf));
+      return NULL;
+    }
+
+  new_table->len = (size - sizeof (*new_table))
+		   / sizeof (struct dl_profile_record);
+  new_table->first_unused = 0;
+  new_table->next = l->root;
+  l->root = new_table;
+  return new_table;
+}
+
+static struct dl_profile_record *
+internal_function
+new_profile_record (struct local *l)
+{
+  struct dl_profile_record *record;
+  struct dl_profile_record_table *new_table;
+
+  new_table = new_profile_record_table (l);
+  if (new_table == NULL);
+    return NULL;
+
+  l->npages += l->npages;
+  record = &new_table->record [new_table->first_unused++];
+  return record;
+}
+
+static struct dl_profile_record *
+internal_function
+make_profile_record (void)
+{
+  struct dl_profile_record *record = NULL;
+  struct dl_profile_record_table *t;
+  unsigned int old;
+  struct local *l = &local;
+
+  t = l->root;
+  while (l)
+    {
+      old = t->first_unused;
+      if (old >= t->len)
+	break;
+      else if (compare_and_swap (&t->first_unused, old, old + 1))
+	{
+	  record = &t->record [old];
+	  goto done;
+	}
+    }
+
+  if (l->free_list)
+    {
+      record = l->free_list;		/* get it from free-list */
+      l->free_list = record->next;
+    }
+  else
+    record = new_profile_record (l);	/* create new record */
+
+done:
+  return record;
+}
+
+static struct dl_profile_record *
+internal_function
+get_profile_record (struct link_map *map)
+{
+  struct dl_profile_record *record, **loc;
+  struct local *l = &local;
+
+  lock (l);
+
+  loc = &l->list;
+  record = *loc;
+  while (record && record->map->l_addr <= map->l_addr)
+    {
+      if (record->map == map)
+        {
+	/* We already have it. Do nothing.  */
+	  record = NULL;
+	  goto done;
+	}
+      loc = &record->next;
+      record = *loc;
+    }
+
+  record = make_profile_record ();
+  if (record)
+    {
+      record->map = map;
+      record->next = *loc;
+      *loc = record;
+    }
+
+done:
+  unlock (l);
+
+  return record;
+}
+
+static inline struct dl_profile_record *
+internal_function
+find_profile_record (ElfW(Addr) selfpc)
+{
+  struct dl_profile_record *record, *found;
+  struct local *l = &local;
+
+  lock (l);
+
+  found = NULL;
+  for (record = l->list; record && record->lowpc <= selfpc;
+       record = record->next)
+    if (selfpc < record->highpc)
+      {
+	found = record;
+	break;
+      }
+
+  unlock (l);
 
-static struct here_fromstruct *froms;
-static uint32_t fromlimit;
-static volatile uint32_t fromidx;
-
-static uintptr_t lowpc;
-static size_t textsize;
-static unsigned int hashfraction;
-static unsigned int log_hashfraction;
+  return found;
+}
 
+#include <dl-profil.h>
 
+void
+internal_function
+_dl_init_profile (const char *list, int all)
+{
+  local.npages = 1;
+  if (new_profile_record_table (&local) == NULL)
+    /* Turn off the profiling.  */
+    local.npages = 0;
+  profile_list = list;
+  profile_all = all;
+#ifdef DL_INIT_PROFILE
+  DL_INIT_PROFILE ();
+#endif
+}
 
 /* Set up profiling data to profile object desribed by MAP.  The output
    file is found (or created) in OUTPUT_DIR.  */
@@ -185,13 +421,49 @@ _dl_start_profile (struct link_map *map,
   size_t idx;
   size_t tossize;
   size_t fromssize;
-  uintptr_t highpc;
   struct gmon_hdr *addr = NULL;
   off_t expected_size;
   /* See profil(2) where this is described.  */
-  int s_scale;
+  const char *soname;
+  const char *match, *end;
+  size_t i, l;
+  struct dl_profile_record *record;
 #define SCALE_1_TO_1	0x10000L
 
+  if (local.npages == 0 || map->l_type == lt_executable)
+    return;
+
+  if (map->l_info[DT_SONAME] != NULL)
+    soname = ((const char *) D_PTR (map, l_info[DT_STRTAB])
+	      + map->l_info[DT_SONAME]->d_un.d_val);
+  else
+    {
+      if (map->l_name [0])
+	soname = basename (map->l_name);
+      else
+	soname = _dl_argv [0];
+    }
+
+  if (profile_all == 0)
+    {
+      match = strstr (profile_list, soname);
+      if (match == NULL)
+	return;
+
+      l = strlen (soname);
+      end = strchr (match, ':');
+      if (end)
+	i = end - match;
+      else
+	i = strlen (match);
+      if (i != l)
+	return;
+    }
+
+  record = get_profile_record (map);
+  if (record == NULL)
+    return;
+
   /* Compute the size of the sections which contain program code.  */
   for (ph = map->l_phdr; ph < &map->l_phdr[map->l_phnum]; ++ph)
     if (ph->p_type == PT_LOAD && (ph->p_flags & PF_X))
@@ -208,31 +480,34 @@ _dl_start_profile (struct link_map *map,
 
   /* Now we can compute the size of the profiling data.  This is done
      with the same formulars as in `monstartup' (see gmon.c).  */
-  state = GMON_PROF_OFF;
-  lowpc = ROUNDDOWN (mapstart + map->l_addr,
-		     HISTFRACTION * sizeof (HISTCOUNTER));
-  highpc = ROUNDUP (mapend + map->l_addr,
-		    HISTFRACTION * sizeof (HISTCOUNTER));
-  textsize = highpc - lowpc;
-  kcountsize = textsize / HISTFRACTION;
-  hashfraction = HASHFRACTION;
+  record->state = GMON_PROF_OFF;
+  record->lowpc = ROUNDDOWN (mapstart + map->l_addr,
+			     HISTFRACTION * sizeof (HISTCOUNTER));
+  record->highpc = ROUNDUP (mapend + map->l_addr,
+			    HISTFRACTION * sizeof (HISTCOUNTER));
+  record->textsize = record->highpc - record->lowpc;
+  record->kcountsize = record->textsize / HISTFRACTION;
+  record->hashfraction = HASHFRACTION;
   if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
     /* If HASHFRACTION is a power of two, mcount can use shifting
        instead of integer division.  Precompute shift amount.  */
-    log_hashfraction = __ffs (hashfraction * sizeof (*froms)) - 1;
+    record->log_hashfraction = __ffs (record->hashfraction
+				      * sizeof (*record->froms)) - 1;
   else
-    log_hashfraction = -1;
-  tossize = textsize / HASHFRACTION;
-  fromlimit = textsize * ARCDENSITY / 100;
-  if (fromlimit < MINARCS)
-    fromlimit = MINARCS;
-  if (fromlimit > MAXARCS)
-    fromlimit = MAXARCS;
-  fromssize = fromlimit * sizeof (struct here_fromstruct);
+    record->log_hashfraction = -1;
+  tossize = record->textsize / HASHFRACTION;
+  record->fromlimit = record->textsize * ARCDENSITY / 100;
+  if (record->fromlimit < MINARCS)
+    record->fromlimit = MINARCS;
+  if (record->fromlimit > MAXARCS)
+    record->fromlimit = MAXARCS;
+  fromssize = record->fromlimit
+		      * sizeof (struct here_fromstruct);
 
   expected_size = (sizeof (struct gmon_hdr)
-		   + 4 + sizeof (struct gmon_hist_hdr) + kcountsize
-		   + 4 + 4 + fromssize * sizeof (struct here_cg_arc_record));
+		   + 4 + sizeof (struct gmon_hist_hdr)
+		   + record->kcountsize + 4 + 4
+		   + fromssize * sizeof (struct here_cg_arc_record));
 
   /* Create the gmon_hdr we expect or write.  */
   memset (&gmon_hdr, '\0', sizeof (struct gmon_hdr));
@@ -242,7 +517,8 @@ _dl_start_profile (struct link_map *map,
   /* Create the hist_hdr we expect or write.  */
   *(char **) hist_hdr.low_pc = (char *) mapstart;
   *(char **) hist_hdr.high_pc = (char *) mapend;
-  *(int32_t *) hist_hdr.hist_size = kcountsize / sizeof (HISTCOUNTER);
+  *(int32_t *) hist_hdr.hist_size 
+    = record->kcountsize / sizeof (HISTCOUNTER);
   *(int32_t *) hist_hdr.prof_rate = __profile_frequency ();
   strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
   hist_hdr.dimen_abbrev = 's';
@@ -250,11 +526,11 @@ _dl_start_profile (struct link_map *map,
   /* First determine the output name.  We write in the directory
      OUTPUT_DIR and the name is composed from the shared objects
      soname (or the file name) and the ending ".profile".  */
-  filename = (char *) alloca (strlen (output_dir) + 1 + strlen (_dl_profile)
-			      + sizeof ".profile");
+  filename = (char *) alloca (strlen (output_dir) + 1
+			      + strlen (soname) + sizeof ".profile");
   cp = __stpcpy (filename, output_dir);
   *cp++ = '/';
-  __stpcpy (__stpcpy (cp, _dl_profile), ".profile");
+  __stpcpy (__stpcpy (cp, soname), ".profile");
 
 #ifdef O_NOFOLLOW
 # define EXTRA_FLAGS | O_NOFOLLOW
@@ -317,7 +593,7 @@ _dl_start_profile (struct link_map *map,
 	__munmap ((void *) addr, expected_size);
 
       _dl_error_printf ("%s: file is no correct profile data file for `%s'\n",
-			filename, _dl_profile);
+			filename, soname);
       return;
     }
 
@@ -338,12 +614,15 @@ _dl_start_profile (struct link_map *map,
 
   /* Pointer to data after the header.  */
   hist = (char *) (addr + 1);
-  kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
-			 + sizeof (struct gmon_hist_hdr));
+  record->kcount = (uint16_t *) ((char *) hist + sizeof (uint32_t)
+				 + sizeof (struct gmon_hist_hdr));
 
   /* Compute pointer to array of the arc information.  */
-  narcsp = (uint32_t *) ((char *) kcount + kcountsize + sizeof (uint32_t));
-  data = (struct here_cg_arc_record *) ((char *) narcsp + sizeof (uint32_t));
+  record->narcsp = (uint32_t *) ((char *) record->kcount
+				 + record->kcountsize
+				 + sizeof (uint32_t));
+  record->data = (struct here_cg_arc_record *) ((char *) record->narcsp
+						+ sizeof (uint32_t));
 
   if (st.st_size == 0)
     {
@@ -354,7 +633,7 @@ _dl_start_profile (struct link_map *map,
       memcpy (hist + sizeof (uint32_t), &hist_hdr,
 	      sizeof (struct gmon_hist_hdr));
 
-      narcsp[-1] = GMON_TAG_CG_ARC;
+      record->narcsp[-1] = GMON_TAG_CG_ARC;
     }
   else
     {
@@ -363,21 +642,22 @@ _dl_start_profile (struct link_map *map,
 	  || *(uint32_t *) hist != GMON_TAG_TIME_HIST
 	  || memcmp (hist + sizeof (uint32_t), &hist_hdr,
 		     sizeof (struct gmon_hist_hdr)) != 0
-	  || narcsp[-1] != GMON_TAG_CG_ARC)
+	  || record->narcsp[-1] != GMON_TAG_CG_ARC)
 	goto wrong_format;
     }
 
   /* Allocate memory for the froms data and the pointer to the tos records.  */
-  tos = (uint16_t *) calloc (tossize + fromssize, 1);
-  if (tos == NULL)
+  record->tos = (uint16_t *) calloc (tossize + fromssize, 1);
+  if (record->tos == NULL)
     {
       __munmap ((void *) addr, expected_size);
       _dl_fatal_printf ("Out of memory while initializing profiler\n");
       /* NOTREACHED */
     }
 
-  froms = (struct here_fromstruct *) ((char *) tos + tossize);
-  fromidx = 0;
+  record->froms = (struct here_fromstruct *) ((char *) record->tos
+					      + tossize);
+  record->fromidx = 0;
 
   /* Now we have to process all the arc count entries.  BTW: it is
      not critical whether the *NARCSP value changes meanwhile.  Before
@@ -386,47 +666,77 @@ _dl_start_profile (struct link_map *map,
 
      Loading the entries in reverse order should help to get the most
      frequently used entries at the front of the list.  */
-  for (idx = narcs = MIN (*narcsp, fromlimit); idx > 0; )
+  for (idx = record->narcs = MIN (*record->narcsp, record->fromlimit);
+       idx > 0; )
     {
       size_t to_index;
       size_t newfromidx;
       --idx;
-      to_index = (data[idx].self_pc / (hashfraction * sizeof (*tos)));
-      newfromidx = fromidx++;
-      froms[newfromidx].here = &data[idx];
-      froms[newfromidx].link = tos[to_index];
-      tos[to_index] = newfromidx;
+      to_index = (record->data[idx].self_pc
+		  / (record->hashfraction * sizeof (*record->tos)));
+      newfromidx = record->fromidx++;
+      record->froms[newfromidx].here = &record->data[idx];
+      record->froms[newfromidx].link = record->tos[to_index];
+      record->tos[to_index] = newfromidx;
     }
 
   /* Setup counting data.  */
-  if (kcountsize < highpc - lowpc)
+  if (record->kcountsize < record->highpc - record->lowpc)
     {
 #if 0
       s_scale = ((double) kcountsize / (highpc - lowpc)) * SCALE_1_TO_1;
 #else
-      size_t range = highpc - lowpc;
-      size_t quot = range / kcountsize;
+      size_t range = record->highpc - record->lowpc;
+      size_t quot = range / record->kcountsize;
 
       if (quot >= SCALE_1_TO_1)
-	s_scale = 1;
+	record->scale = 1;
       else if (quot >= SCALE_1_TO_1 / 256)
-	s_scale = SCALE_1_TO_1 / quot;
+	record->scale = SCALE_1_TO_1 / quot;
       else if (range > ULONG_MAX / 256)
-	s_scale = (SCALE_1_TO_1 * 256) / (range / (kcountsize / 256));
+	record->scale = (SCALE_1_TO_1 * 256)
+			/ (range / (record->kcountsize / 256));
       else
-	s_scale = (SCALE_1_TO_1 * 256) / ((range * 256) / kcountsize);
+	record->scale = (SCALE_1_TO_1 * 256)
+			/ ((range * 256) / record->kcountsize);
 #endif
     }
   else
-    s_scale = SCALE_1_TO_1;
-
-  /* Start the profiler.  */
-  __profil ((void *) kcount, kcountsize, lowpc, s_scale);
+    record->scale = SCALE_1_TO_1;
 
   /* Turn on profiling.  */
-  state = GMON_PROF_ON;
+  record->state = GMON_PROF_ON;
 }
 
+void
+internal_function
+_dl_stop_profile (struct link_map *map)
+{
+  struct dl_profile_record *record, **loc;
+  struct local *l = &local;
+
+  lock (l); 
+
+  loc = &l->list;
+  record = *loc;
+  while (record && record->map->l_addr <= map->l_addr)
+    {
+      if (record->map == map)
+	/* We find it.  */
+        break;
+      loc = &record->next;
+      record = *loc;
+    }
+
+  if (record && record->map == map)
+    {
+      *loc = record->next;
+      record->next = l->free_list;	/* put it on free-list */
+      l->free_list = record;
+    }
+
+  unlock (l);
+}
 
 void
 _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc)
@@ -434,13 +744,19 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
   volatile uint16_t *topcindex;
   size_t i, fromindex;
   struct here_fromstruct *fromp;
+  struct dl_profile_record *record;
+
+  /* Find the proper record.  */
+  record = find_profile_record (selfpc);
+  if (record == NULL)
+    goto done;
 
 #if 0
   /* XXX I think this is now not necessary anymore.  */
   if (! compare_and_swap (&state, GMON_PROF_ON, GMON_PROF_BUSY))
     return;
 #else
-  if (state != GMON_PROF_ON)
+  if (record->state != GMON_PROF_ON)
     return;
 #endif
 
@@ -449,12 +765,10 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
      restrict it in any way, just set to a fixed value (0) in case it
      is outside the allowed range.  These calls show up as calls from
      <external> in the gprof output.  */
-  frompc -= lowpc;
-  if (frompc >= textsize)
+  frompc -= record->lowpc;
+  if (frompc >= record->textsize)
     frompc = 0;
-  selfpc -= lowpc;
-  if (selfpc >= textsize)
-    goto done;
+  selfpc -= record->lowpc;
 
   /* Getting here we now have to find out whether the location was
      already used.  If yes we are lucky and only have to increment a
@@ -463,17 +777,17 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
 
   /* Avoid integer divide if possible.  */
   if ((HASHFRACTION & (HASHFRACTION - 1)) == 0)
-    i = selfpc >> log_hashfraction;
+    i = selfpc >> record->log_hashfraction;
   else
-    i = selfpc / (hashfraction * sizeof (*tos));
+    i = selfpc / (record->hashfraction * sizeof (*record->tos));
 
-  topcindex = &tos[i];
+  topcindex = &record->tos[i];
   fromindex = *topcindex;
 
   if (fromindex == 0)
     goto check_new_or_add;
 
-  fromp = &froms[fromindex];
+  fromp = &record->froms[fromindex];
 
   /* We have to look through the chain of arcs whether there is already
      an entry for our arc.  */
@@ -481,7 +795,7 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
     {
       if (fromp->link != 0)
 	do
-	  fromp = &froms[fromp->link];
+	  fromp = &record->froms[fromp->link];
 	while (fromp->link != 0 && fromp->here->from_pc != frompc);
 
       if (fromp->here->from_pc != frompc)
@@ -491,43 +805,45 @@ _dl_mcount (ElfW(Addr) frompc, ElfW(Addr
 	check_new_or_add:
 	  /* Our entry is not among the entries we read so far from the
 	     data file.  Now see whether we have to update the list.  */
-	  while (narcs != *narcsp && narcs < fromlimit)
+	  while (record->narcs != *record->narcsp
+		 && record->narcs < record->fromlimit)
 	    {
 	      size_t to_index;
 	      size_t newfromidx;
-	      to_index = (data[narcs].self_pc
-			  / (hashfraction * sizeof (*tos)));
-	      newfromidx = exchange_and_add (&fromidx, 1) + 1;
-	      froms[newfromidx].here = &data[narcs];
-	      froms[newfromidx].link = tos[to_index];
-	      tos[to_index] = newfromidx;
-	      atomic_add (&narcs, 1);
+	      to_index = (record->data[record->narcs].self_pc
+			  / (record->hashfraction
+			     * sizeof (*record->tos)));
+	      newfromidx = exchange_and_add (&record->fromidx, 1) + 1;
+	      record->froms[newfromidx].here = &record->data[record->narcs];
+	      record->froms[newfromidx].link = record->tos[to_index];
+	      record->tos[to_index] = newfromidx;
+	      atomic_add (&record->narcs, 1);
 	    }
 
 	  /* If we still have no entry stop searching and insert.  */
 	  if (*topcindex == 0)
 	    {
-	      uint_fast32_t newarc = exchange_and_add (narcsp, 1);
+	      uint_fast32_t newarc = exchange_and_add (record->narcsp, 1);
 
 	      /* In rare cases it could happen that all entries in FROMS are
 		 occupied.  So we cannot count this anymore.  */
-	      if (newarc >= fromlimit)
+	      if (newarc >= record->fromlimit)
 		goto done;
 
-	      *topcindex = exchange_and_add (&fromidx, 1) + 1;
-	      fromp = &froms[*topcindex];
+	      *topcindex = exchange_and_add (&record->fromidx, 1) + 1;
+	      fromp = &record->froms[*topcindex];
 
-	      fromp->here = &data[newarc];
-	      data[newarc].from_pc = frompc;
-	      data[newarc].self_pc = selfpc;
-	      data[newarc].count = 0;
+	      fromp->here = &record->data[newarc];
+	      record->data[newarc].from_pc = frompc;
+	      record->data[newarc].self_pc = selfpc;
+	      record->data[newarc].count = 0;
 	      fromp->link = 0;
-	      atomic_add (&narcs, 1);
+	      atomic_add (&record->narcs, 1);
 
 	      break;
 	    }
 
-	  fromp = &froms[*topcindex];
+	  fromp = &record->froms[*topcindex];
 	}
       else
 	/* Found in.  */
--- libc/elf/dl-profstub.c.prof-dso	Sat Jul  7 16:44:45 2001
+++ libc/elf/dl-profstub.c	Tue Sep 25 17:00:29 2001
@@ -22,10 +22,6 @@
 #include <elf.h>
 #include <ldsodefs.h>
 
-/* This is the map for the shared object we profile.  It is defined here
-   only because we test for this value being NULL or not.  */
-extern struct link_map *_dl_profile_map;
-
 
 void
 _dl_mcount_wrapper (void *selfpc)
@@ -37,6 +33,6 @@ _dl_mcount_wrapper (void *selfpc)
 void
 _dl_mcount_wrapper_check (void *selfpc)
 {
-  if (_dl_profile_map != NULL)
+  if (_dl_profile != NULL)
     _dl_mcount ((ElfW(Addr)) RETURN_ADDRESS (0), (ElfW(Addr)) selfpc);
 }
--- libc/elf/dl-support.c.prof-dso	Tue Sep 25 17:00:29 2001
+++ libc/elf/dl-support.c	Tue Sep 25 17:36:23 2001
@@ -62,9 +62,6 @@ const char *_dl_profile;
    ignored.  */
 const char *_dl_inhibit_rpath;
 
-/* The map for the object we will profile.  */
-struct link_map *_dl_profile_map;
-
 /* This is the address of the last stack address ever used.  */
 void *__libc_stack_end;
 
--- libc/elf/rtld.c.prof-dso	Tue Sep 25 17:00:29 2001
+++ libc/elf/rtld.c	Tue Sep 25 17:44:06 2001
@@ -64,9 +64,9 @@ size_t _dl_platformlen;
 unsigned long _dl_hwcap;
 fpu_control_t _dl_fpu_control = _FPU_DEFAULT;
 struct r_search_path *_dl_search_paths;
+static int profile_all;
 const char *_dl_profile;
 const char *_dl_profile_output;
-struct link_map *_dl_profile_map;
 int _dl_lazy = 1;
 /* XXX I know about at least one case where we depend on the old weak
    behavior (it has to do with librt).  Until we get DSO groups implemented
@@ -1039,6 +1039,9 @@ of this helper program; chances are you 
     while (l->l_next)
       l = l->l_next;
 
+    if (__builtin_expect (consider_profiling, 0))
+      _dl_init_profile (_dl_profile, profile_all);
+
     HP_TIMING_NOW (start);
     do
       {
@@ -1054,7 +1057,12 @@ of this helper program; chances are you 
 	  }
 
 	if (l != &_dl_rtld_map)
-	  _dl_relocate_object (l, l->l_scope, _dl_lazy, consider_profiling);
+	  {
+	    _dl_relocate_object (l, l->l_scope, _dl_lazy,
+				 consider_profiling);
+	    if (__builtin_expect (consider_profiling, 0))
+	      _dl_start_profile (l, _dl_profile_output);
+	  }
 
 	l = l->l_prev;
       }
@@ -1070,14 +1078,6 @@ of this helper program; chances are you 
        _dl_relocate_object might need to call `mprotect' for DT_TEXTREL.  */
     _dl_sysdep_start_cleanup ();
 
-    /* Now enable profiling if needed.  Like the previous call,
-       this has to go here because the calls it makes should use the
-       rtld versions of the functions (particularly calloc()), but it
-       needs to have _dl_profile_map set up by the relocator.  */
-    if (__builtin_expect (_dl_profile_map != NULL, 0))
-      /* We must prepare the profiling.  */
-      _dl_start_profile (_dl_profile_map, _dl_profile_output);
-
     if (_dl_rtld_map.l_opencount > 1)
       {
 	/* There was an explicit ref to the dynamic linker as a shared lib.
@@ -1344,7 +1344,7 @@ process_envvars (enum mode *modep)
 	      break;
 	    }
 
-	  /* Which shared object shall be profiled.  */
+	  /* Which shared objects shall be profiled.  */
 	  if (memcmp (envline, "PROFILE", 7) == 0)
 	    _dl_profile = &envline[8];
 	  break;
@@ -1378,6 +1378,13 @@ process_envvars (enum mode *modep)
 	  if (!__libc_enable_secure
 	      && memcmp (envline, "ORIGIN_PATH", 11) == 0)
 	    _dl_origin_path = &envline[12];
+
+	  /* Profile all shared objects?  */
+	  if (memcmp (envline, "PROFILE_ALL", 11) == 0)
+	    {
+	      _dl_profile = &envline[12];
+	      profile_all = 1;
+	    }
 	  break;
 
 	case 12:
--- libc/sysdeps/generic/ldsodefs.h.prof-dso	Thu Sep  6 16:17:02 2001
+++ libc/sysdeps/generic/ldsodefs.h	Tue Sep 25 17:38:52 2001
@@ -187,10 +187,8 @@ extern unsigned int _dl_osversion;
 /* File descriptor referring to the zero-fill device.  */
 extern int _dl_zerofd;
 
-/* Name of the shared object to be profiled (if any).  */
+/* Names of the shared objects to be profiled (if any).  */
 extern const char *_dl_profile;
-/* Map of shared object to be profiled.  */
-extern struct link_map *_dl_profile_map;
 /* Filename of the output file.  */
 extern const char *_dl_profile_output;
 
@@ -458,8 +456,12 @@ extern void _dl_init_paths (const char *
 
 /* Gather the information needed to install the profiling tables and start
    the timers.  */
+extern void _dl_init_profile (const char *filelist, int profile_all)
+  internal_function;
+
 extern void _dl_start_profile (struct link_map *map, const char *output_dir)
      internal_function;
+extern void _dl_stop_profile (struct link_map *map) internal_function;
 
 /* The actual functions used to keep book on the calls.  */
 extern void _dl_mcount (ElfW(Addr) frompc, ElfW(Addr) selfpc);
--- libc/sysdeps/i386/dl-machine.h.prof-dso	Fri Aug 31 09:36:57 2001
+++ libc/sysdeps/i386/dl-machine.h	Tue Sep 25 17:00:29 2001
@@ -96,14 +96,7 @@ elf_machine_runtime_setup (struct link_m
 	 don't store the address in the GOT so that all future calls also
 	 end in this function.  */
       if (__builtin_expect (profile, 0))
-	{
-	  got[2] = (Elf32_Addr) &_dl_runtime_profile;
-
-	  if (_dl_name_match_p (_dl_profile, l))
-	    /* This is the object we are looking for.  Say that we really
-	       want profiling and the timers are started.  */
-	    _dl_profile_map = l;
-	}
+	got[2] = (Elf32_Addr) &_dl_runtime_profile;
       else
 	/* This function will get called to fix up the GOT entry indicated by
 	   the offset on the stack, and then jump to the resolved address.  */
--- libc/sysdeps/posix/dl-profil.h.prof-dso	Tue Sep 25 17:00:29 2001
+++ libc/sysdeps/posix/dl-profil.h	Tue Sep 25 17:00:29 2001
@@ -0,0 +1,72 @@
+/* Low-level statistical profiling support function for dynamic
+   linkker.  Mostly POSIX.1 version.
+   Copyright (C) 2001 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, write to the Free
+   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307 USA.  */
+
+#include <signal.h>
+#include <sys/time.h>
+
+#ifdef SIGPROF
+static inline void
+profil_count (void *pc)
+{
+  struct dl_profile_record *record;
+  size_t i;
+
+  /* Find the proper record.  */
+  record = find_profile_record ((ElfW(Addr)) pc);
+  if (record == NULL)
+    return;
+
+  i = (pc - record->lowpc - (void *) 0) / 2;
+  if (sizeof (unsigned long long int) > sizeof (size_t))
+    i = (unsigned long long int) i * record->scale / 65536;
+  else
+    i = i / 65536 * record->scale + i % 65536 * record->scale / 65536;
+
+  ++(record->kcount [i]);
+}
+
+/* Get the machine-dependent definition of `profil_counter', the signal
+   handler for SIGPROF.  It calls `profil_count' (above) with the PC of the
+   interrupted code.  */
+#include "profil-counter.h"
+
+/* Enable statistical profiling.  */
+static void
+internal_function
+dl_profil (void)
+{
+  struct sigaction act;
+  struct itimerval timer;
+
+  act.sa_handler = (sighandler_t) &profil_counter;
+  act.sa_flags = SA_RESTART;
+  sigfillset (&act.sa_mask);
+  if (__sigaction (SIGPROF, &act, NULL) < 0)
+    return;
+
+  timer.it_value.tv_sec = 0;
+  timer.it_value.tv_usec = 1;
+  timer.it_interval = timer.it_value;
+  __setitimer (ITIMER_PROF, &timer, NULL);
+  return;
+}
+
+#define DL_INIT_PROFILE() dl_profil ()
+#endif

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]