This is the mail archive of the libc-alpha@sourceware.org mailing list for the glibc project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH v2] Use HP_TIMING for benchmarks if available


On Mon, May 13, 2013 at 01:08:37AM -0700, David Miller wrote:
> From: Siddhesh Poyarekar <siddhesh@redhat.com>
> Date: Mon, 13 May 2013 13:26:54 +0530
> 
> > On Mon, May 13, 2013 at 09:48:19AM +0200, Andreas Schwab wrote:
> >> 
> >> Please add a comment.
> >> 
> > 
> > Done.  Updated patch.
> > 
> > Siddhesh
> > 
> > 	* benchtests/Makefile (CPPFLAGS-nonlib): Add
> > 	-DUSE_CLOCK_GETTIME if USE_CLOCK_GETTIME is defined.
> > 	(bench-deps): Add bench-timing.h.
> > 	* benchtests-bench-skeleton.c: Include bench-timing.h.
> > 	(main): Use TIMING_* macros instead of clock_gettime.
> > 	* benchtests/bench-timing.h: New file.
> 
> This looks fine to me, thanks for doing this work.

Thanks, this is what I've committed after fixing Ondrej's review
comment.

Siddhesh

commit 43fe811b73d8f585a4ae837d4a9d4c0f5d46b779
Author: Siddhesh Poyarekar <siddhesh@redhat.com>
Date:   Mon May 13 13:44:32 2013 +0530

    Use HP_TIMING for benchmarks if available
    
    HP_TIMING uses native timestamping instructions if available, thus
    greatly reducing the overhead of recording start and end times for
    function calls.  For architectures that don't have HP_TIMING
    available, we fall back to the clock_gettime bits.  One may also
    override this by invoking the benchmark as follows:
    
      make USE_CLOCK_GETTIME=1 bench
    
    and get the benchmark results using clock_gettime.  One has to do
    `make bench-clean` to ensure that the benchmark programs are rebuilt.

diff --git a/ChangeLog b/ChangeLog
index 8a12bf9..ea678c8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
 2013-05-13  Siddhesh Poyarekar  <siddhesh@redhat.com>
 
+	* benchtests/Makefile (CPPFLAGS-nonlib): Add
+	-DUSE_CLOCK_GETTIME if USE_CLOCK_GETTIME is defined.
+	(bench-deps): Add bench-timing.h.
+	* benchtests-bench-skeleton.c: Include bench-timing.h.
+	(main): Use TIMING_* macros instead of clock_gettime.
+	* benchtests/bench-timing.h: New file.
+
 	[BZ #14582]
 	* sysdeps/ieee754/s_lib_version.c (_LIB_VERSION_INTERNAL):
 	Renamed from _LIB_VERSION.
diff --git a/benchtests/Makefile b/benchtests/Makefile
index 19e1be6..8618390 100644
--- a/benchtests/Makefile
+++ b/benchtests/Makefile
@@ -86,13 +86,19 @@ endif
 
 CPPFLAGS-nonlib = -DDURATION=$(BENCH_DURATION)
 
+# Use clock_gettime to measure performance of functions.  The default is to use
+# HP_TIMING if it is available.
+ifdef USE_CLOCK_GETTIME
+CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
+endif
+
 # This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
 # for all these modules.
 cpp-srcs-left := $(binaries-bench:=.c)
 lib := nonlib
 include $(patsubst %,$(..)cppflags-iterator.mk,$(cpp-srcs-left))
 
-bench-deps := bench-skeleton.c Makefile
+bench-deps := bench-skeleton.c bench-timing.h Makefile
 
 run-bench = $(test-wrapper-env) \
 	    GCONV_PATH=$(common-objpfx)iconvdata LC_ALL=C \
diff --git a/benchtests/bench-skeleton.c b/benchtests/bench-skeleton.c
index 404900b..4e3a507 100644
--- a/benchtests/bench-skeleton.c
+++ b/benchtests/bench-skeleton.c
@@ -21,6 +21,7 @@
 #include <stdio.h>
 #include <time.h>
 #include <inttypes.h>
+#include "bench-timing.h"
 
 volatile unsigned int dontoptimize = 0;
 
@@ -45,21 +46,16 @@ int
 main (int argc, char **argv)
 {
   unsigned long i, k;
-  struct timespec start, end, runtime;
+  struct timespec runtime;
+  timing_t start, end;
 
   startup();
 
   memset (&runtime, 0, sizeof (runtime));
-  memset (&start, 0, sizeof (start));
-  memset (&end, 0, sizeof (end));
 
-  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);
+  unsigned long iters;
 
-  /* Measure 1000 times the resolution of the clock.  So for a 1ns resolution
-     clock, we measure 1000 iterations of the function call at a time.
-     Measurements close to the minimum clock resolution won't make much sense,
-     but it's better than having nothing at all.  */
-  unsigned long iters = 1000 * start.tv_nsec;
+  TIMING_INIT (iters);
 
   for (int v = 0; v < NUM_VARIANTS; v++)
     {
@@ -68,19 +64,18 @@ main (int argc, char **argv)
       runtime.tv_sec += DURATION;
 
       double d_total_i = 0;
-      uint64_t total = 0, max = 0, min = 0x7fffffffffffffff;
+      timing_t total = 0, max = 0, min = 0x7fffffffffffffff;
       while (1)
 	{
 	  for (i = 0; i < NUM_SAMPLES (v); i++)
 	    {
-	      clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &start);
+	      uint64_t cur;
+	      TIMING_NOW (start);
 	      for (k = 0; k < iters; k++)
 		BENCH_FUNC (v, i);
-	      clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &end);
+	      TIMING_NOW (end);
 
-	      uint64_t cur = (end.tv_nsec - start.tv_nsec
-			      + ((end.tv_sec - start.tv_sec)
-				 * (uint64_t) 1000000000));
+	      TIMING_DIFF (cur, start, end);
 
 	      if (cur > max)
 		max = cur;
@@ -88,7 +83,7 @@ main (int argc, char **argv)
 	      if (cur < min)
 		min = cur;
 
-	      total += cur;
+	      TIMING_ACCUM (total, cur);
 
 	      d_total_i += iters;
 	    }
@@ -104,13 +99,11 @@ main (int argc, char **argv)
       double d_iters;
 
     done:
-      d_total_s = total * 1e-9;
+      d_total_s = total;
       d_iters = iters;
 
-      printf ("%s: ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",
-	      VARIANT (v),
-	      d_total_i, d_total_s, max / d_iters, min / d_iters,
-	      d_total_i / d_total_s);
+      TIMING_PRINT_STATS (VARIANT (v), d_total_s, d_iters, d_total_i, max,
+			  min);
     }
 
   return 0;
diff --git a/benchtests/bench-timing.h b/benchtests/bench-timing.h
new file mode 100644
index 0000000..264d4b8
--- /dev/null
+++ b/benchtests/bench-timing.h
@@ -0,0 +1,72 @@
+/* Define timing macros.
+   Copyright (C) 2013 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#include <hp-timing.h>
+#include <stdint.h>
+
+#if HP_TIMING_AVAIL && !defined USE_CLOCK_GETTIME
+# define GL(x) _##x
+# define GLRO(x) _##x
+hp_timing_t _dl_hp_timing_overhead;
+typedef hp_timing_t timing_t;
+
+# define TIMING_INIT(iters) \
+({									      \
+  HP_TIMING_DIFF_INIT();						      \
+  (iters) = 1000;							      \
+})
+
+# define TIMING_NOW(var) HP_TIMING_NOW (var)
+# define TIMING_DIFF(diff, start, end) HP_TIMING_DIFF ((diff), (start), (end))
+# define TIMING_ACCUM(sum, diff) HP_TIMING_ACCUM_NT ((sum), (diff))
+
+# define TIMING_PRINT_STATS(func, d_total_s, d_iters, d_total_i, max, min) \
+  printf ("%s: ITERS:%g: TOTAL:%gMcy, MAX:%gcy, MIN:%gcy, %g calls/Mcy\n",    \
+	  (func), (d_total_i), (d_total_s) * 1e-6, (max) / (d_iters),	      \
+	  (min) / (d_iters), 1e6 * (d_total_i) / (d_total_s));
+
+#else
+typedef uint64_t timing_t;
+
+/* Measure 1000 times the resolution of the clock.  So for a 1ns
+   resolution  clock, we measure 1000 iterations of the function call at a
+   time.  Measurements close to the minimum clock resolution won't make
+   much sense, but it's better than having nothing at all.  */
+# define TIMING_INIT(iters) \
+({									      \
+  struct timespec start;						      \
+  clock_getres (CLOCK_PROCESS_CPUTIME_ID, &start);			      \
+  (iters) = 1000 * start.tv_nsec;					      \
+})
+
+# define TIMING_NOW(var) \
+({									      \
+  struct timespec tv;							      \
+  clock_gettime (CLOCK_PROCESS_CPUTIME_ID, &tv);			      \
+  (var) = (uint64_t) (tv.tv_nsec + (uint64_t) 1000000000 * tv.tv_sec);	      \
+})
+
+# define TIMING_DIFF(diff, start, end) (diff) = (end) - (start)
+# define TIMING_ACCUM(sum, diff) (sum) += (diff)
+
+# define TIMING_PRINT_STATS(func, d_total_s, d_iters, d_total_i, max, min) \
+  printf ("%s: ITERS:%g: TOTAL:%gs, MAX:%gns, MIN:%gns, %g iter/s\n",	      \
+	  (func), (d_total_i), (d_total_s) * 1e-9, (max) / (d_iters),		      \
+	  (min) / (d_iters), 1e9 * (d_total_i) / (d_total_s))
+
+#endif


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]