This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Perfmon systemtap runtime support


Hi

I have been working on getting some performance monitoring support into systemtap. The perfmon1.diff patch is a very simple addition to the runtime. It just has functions to setup the perfmon monitoring hardware, read a counter, and shutdown the performance monitoring hardware. It uses the perfmon2 kernel ABI to configure the hardware.

I have completed changes to the translator to use the runtime functions. I took Marin's suggestion of using guru mode to allow access to the various C functions and wrote some examples that used the runtime functions.

The cost is relatively high for accessing the counters. Below is the output from p2x.stp, counting the number of cycles between consecutive calls to read the cycle count:

[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g  p2x.stp
interval = 15491
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g  p2x.stp
interval = 16317
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g  p2x.stp
interval = 15431
[wcohen@dhcp59-187 systemtap_perfmon]$ ./install/bin/stap -g  p2x.stp
interval = 15392

I would appreciate any comments or feedback on this code.

-Will
? runtime/bench2/bench.stp
? runtime/bench2/itest
? runtime/bench2/stap.out
? runtime/probes/perf
? runtime/probes/os_timer/.built-in.o.cmd
? runtime/probes/os_timer/.os_timer.o.d
? runtime/probes/os_timer/.tmp_versions
? runtime/probes/os_timer/Makefile
? runtime/probes/os_timer/compile.errors
? runtime/probes/scf/.built-in.o.cmd
? runtime/probes/scf/.scf.o.d
? runtime/probes/scf/.tmp_versions
? runtime/probes/scf/Makefile
? runtime/probes/scf/compile.errors
? runtime/probes/test4/.built-in.o.cmd
? runtime/probes/test4/.test4.o.d
? runtime/probes/test4/.tmp_versions
? runtime/probes/test4/Makefile
? runtime/probes/test4/compile.errors
Index: runtime/perf.c
===================================================================
RCS file: runtime/perf.c
diff -N runtime/perf.c
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ runtime/perf.c	13 Jul 2006 22:13:19 -0000
@@ -0,0 +1,132 @@
+/* -*- linux-c -*- 
+ * Perf Functions
+ * Copyright (C) 2006 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software.  You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _PERF_C_
+#define _PERF_C_
+
+#include <linux/perfmon.h>
+
+#include "perf.h"
+
+/** @file perf.c
+ * @brief Implements performance monitoring hardware support
+ */
+
+/* TODO fix so this works on SMP machines
+ * Need to do context load, register setup, and start on each processor
+ *
+ * Similarly need to stop and unload on each processor
+ */
+
+/* TODO make this work with sampling. There needs to be a help thread
+ * handling the sampling. */
+
+
+static int _stp_pfm_register_setup(void *desc,
+		       struct pfarg_pmc pmc[], int pmc_count,
+		       struct pfarg_pmd pmd[], int pmd_count)
+{
+	int err = 0;
+
+	err = pfmk_write_pmcs(desc, pmc, pmc_count);
+	if (err) return err;
+	
+	err = pfmk_write_pmds(desc, pmd, pmd_count);
+	return err;
+}
+
+static struct completion c;
+static struct pfarg_load load_args;
+static struct pfarg_start start_args;
+
+/** Sets up the performance monitoring hardware.
+ * The locations desc and context point to are modified as
+ * side-effects of the setup. desc is a unique pointer used
+ * by the various routines.
+ * @param desc pointer to void *, handle to describe perfmon config
+ * @param context pointer to context information
+ * @param pmc, pointer to array describing control register setup
+ * @param pmc_count, number of entries in pmc
+ * @param pmd, pointer to array describing data register setup
+ * @param pmd_count, number of entries in pmd
+ * @returns an int, 0 if no errors encountered during setup
+ */
+int _stp_perfmon_setup(void **desc,
+		       struct pfarg_ctx *context,
+		       struct pfarg_pmc pmc[], int pmc_count,
+		       struct pfarg_pmd pmd[], int pmd_count)
+{
+	int err = 0;
+
+	/* create a context */
+	err = pfmk_create_context(context, NULL, 0, &c, desc, NULL);
+	if (err) goto cleanup;
+
+	/* set up the counters */
+	err = _stp_pfm_register_setup(*desc, pmc, pmc_count, pmd, pmd_count);
+	if (err) goto cleanup2;
+
+	/* start measuring */
+	err = pfmk_load_context(*desc, &load_args);
+	if (err) {
+		printk("pfmk_load_context error\n");
+		goto cleanup2;
+	}
+	err = pfmk_start(*desc, &start_args);
+	if (err) {
+		printk("pfmk_start error\n");
+		goto cleanup3;
+	}
+
+	return err;
+
+cleanup3: pfmk_unload_context(*desc);
+cleanup2: pfmk_close(*desc);
+cleanup: *desc=NULL; 
+	return err;
+}
+
+/** Shuts down the performance monitoring hardware.
+ * @param desc unique pointer to describe configuration
+ * @returns an int, 0 if no errors encountered during shutdown
+ */
+int _stp_perfmon_shutdown(void *desc)
+{
+	int err=0;
+	/* stop the counters */
+	err=pfmk_stop(desc);
+	if (err) return err;
+	err=pfmk_unload_context(desc);
+	if (err) return err;
+	err=pfmk_close(desc);
+	return err;
+}
+
+/** Reads the performance counter
+ * @param desc unique pointer to describe configuration
+ * @returns an int64, raw value of counter
+ */
+int64_t _stp_perfmon_read(void *desc, int counter)
+{
+	struct pfarg_pmd storage;
+	
+	storage.reg_set = 0;
+	storage.reg_num = counter;
+
+	if ( desc != NULL) {
+		if (pfmk_read_pmds(desc, &storage, 1))
+			printk( "pfm_read_pmds error\n");
+	}
+
+	return storage.reg_value;
+}
+
+#endif /* _PERF_C_ */
+
Index: runtime/perf.h
===================================================================
RCS file: runtime/perf.h
diff -N runtime/perf.h
--- /dev/null	1 Jan 1970 00:00:00 -0000
+++ runtime/perf.h	13 Jul 2006 22:13:19 -0000
@@ -0,0 +1,27 @@
+/* -*- linux-c -*- 
+ * Perf Header File
+ * Copyright (C) 2006 Red Hat Inc.
+ *
+ * This file is part of systemtap, and is free software.  You can
+ * redistribute it and/or modify it under the terms of the GNU General
+ * Public License (GPL); either version 2, or (at your option) any
+ * later version.
+ */
+
+#ifndef _PERF_H_
+#define _PERF_H_
+
+/** @file perf.h
+ * @brief Header file for performance monitoring hardware support
+ */
+
+int _stp_perfmon_setup(void **desc,
+		       struct pfarg_ctx *context,
+		       struct pfarg_pmc pmc[], int pmc_count,
+		       struct pfarg_pmd pmd[], int pmd_count);
+
+int _stp_perfmon_shutdown(void *desc);
+
+int64_t _stp_perfmon_read(void *desc, int counter);
+
+#endif /* _PERF_H_ */
Index: runtime/runtime.h
===================================================================
RCS file: /cvs/systemtap/src/runtime/runtime.h,v
retrieving revision 1.28
diff -u -r1.28 runtime.h
--- runtime/runtime.h	28 Nov 2005 22:08:39 -0000	1.28
+++ runtime/runtime.h	13 Jul 2006 22:13:19 -0000
@@ -64,6 +64,7 @@
 #include "copy.c"
 #include "sym.h"
 #include "alloc.c"
+#include "perf.c"
 
 
 /************* Module Stuff ********************/
/* stap -g p1x.stp
   Make use of guru mode to check that the runtime functions are in place
   This code only works on AMD64 processors.
*/

%{
static struct pfarg_ctx context;
static void *desc;

/* set things up for AMD64 */
#define USR_BIT (1<<16)
#define OS_BIT (1<<17)
#define E_BIT (1<<18)
#define PC_BIT (1<<19)
#define INT_BIT (1<<20)
#define EN_BIT (1<<22)
#define INV_BIT (1<<23)
#define NUM_PMD 1
static struct pfarg_pmd pmd[] = {
	{.reg_num=0, .reg_value=0}
};
static int num_pfm_pmd = NUM_PMD;
#define NUM_PMC 1
static struct pfarg_pmc pmc[] = {
	{.reg_num=0, .reg_value=(0x76|USR_BIT|OS_BIT|EN_BIT|INT_BIT)}
};
static int num_pfm_pmc = NUM_PMC;
%}

function cpu_pfm_init:long ()
%{
	int err = 0;

	/* set up context information */
	/* only does system-wide contexts */
	context.ctx_flags |= PFM_FL_SYSTEM_WIDE;

	err = _stp_perfmon_setup(&desc, &context,
				 pmc, num_pfm_pmc,
				 pmd, num_pfm_pmd);

	printk("err = %d, desc = 0x%p\n", err, desc);

	if (err) {
		printk("unable to set up counters\n");
	}
%}

function cpu_pfm_getreg:long (reg:long)
%{
	THIS->__retvalue = _stp_perfmon_read(desc, THIS->reg);
%}

function cpu_pfm_cleanup:long ()
%{
	if (_stp_perfmon_shutdown(desc)) printk("_stp_pfmk_shutdown error\n");
%}

probe begin { cpu_pfm_init(); }

probe end
{
	printf("pmd = %d\n", cpu_pfm_getreg(0));
	cpu_pfm_cleanup();
}
/* stap -g p2x.stp
   Quick check to see how expensive the reading of the perfmon hw is.
   This code only works on AMD64 processors.
*/

%{
static struct pfarg_ctx context;
static void *desc;

/* set things up for AMD64 */
#define USR_BIT (1<<16)
#define OS_BIT (1<<17)
#define E_BIT (1<<18)
#define PC_BIT (1<<19)
#define INT_BIT (1<<20)
#define EN_BIT (1<<22)
#define INV_BIT (1<<23)
#define NUM_PMD 1
static struct pfarg_pmd pmd[] = {
	{.reg_num=0, .reg_value=0}
};
static int num_pfm_pmd = NUM_PMD;
#define NUM_PMC 1
static struct pfarg_pmc pmc[] = {
	{.reg_num=0, .reg_value=(0x76|USR_BIT|OS_BIT|EN_BIT|INT_BIT)}
};
static int num_pfm_pmc = NUM_PMC;
%}

function cpu_pfm_init:long ()
%{
	int err = 0;

	/* set up context information */
	/* only does system-wide contexts */
	context.ctx_flags |= PFM_FL_SYSTEM_WIDE;

	err = _stp_perfmon_setup(&desc, &context,
				 pmc, num_pfm_pmc,
				 pmd, num_pfm_pmd);

	printk("err = %d, desc = 0x%p\n", err, desc);

	if (err) {
		printk("unable to set up counters\n");
	}
%}

function cpu_pfm_getreg:long (reg:long)
%{
	THIS->__retvalue = _stp_perfmon_read(desc, THIS->reg);
%}

function cpu_pfm_cleanup:long ()
%{
	if (_stp_perfmon_shutdown(desc)) printk("_stp_pfmk_shutdown error\n");
%}

global first
global second

probe begin
{
	cpu_pfm_init();
	first =  cpu_pfm_getreg(0);
	second =  cpu_pfm_getreg(0);
	printf("interval = %d\n", second-first);
	cpu_pfm_cleanup();
}
/* stap -g p3x.stp
   Quick check to see how expensive the reading of the perfmon hw is
   from the C code. Check to see if how much overhead is in stap generated
   code in p2x.stp.
   This code only works on AMD64 processors.
*/

%{
static struct pfarg_ctx context;
static void *desc;

/* set things up for AMD64 */
#define USR_BIT (1<<16)
#define OS_BIT (1<<17)
#define E_BIT (1<<18)
#define PC_BIT (1<<19)
#define INT_BIT (1<<20)
#define EN_BIT (1<<22)
#define INV_BIT (1<<23)
#define NUM_PMD 1
static struct pfarg_pmd pmd[] = {
	{.reg_num=0, .reg_value=0}
};
static int num_pfm_pmd = NUM_PMD;
#define NUM_PMC 1
static struct pfarg_pmc pmc[] = {
	{.reg_num=0, .reg_value=(0x76|USR_BIT|OS_BIT|EN_BIT|INT_BIT)}
};
static int num_pfm_pmc = NUM_PMC;
%}

function cpu_pfm_init:long ()
%{
	int err = 0;

	/* set up context information */
	/* only does system-wide contexts */
	context.ctx_flags |= PFM_FL_SYSTEM_WIDE;

	err = _stp_perfmon_setup(&desc, &context,
				 pmc, num_pfm_pmc,
				 pmd, num_pfm_pmd);

	printk("err = %d, desc = 0x%p\n", err, desc);

	if (err) {
		printk("unable to set up counters\n");
	}
%}

function cpu_pfm_getdiff:long ()
%{
	int64_t first, second;
	first =  _stp_perfmon_read(desc, 0);
	second =  _stp_perfmon_read(desc, 0);
	THIS->__retvalue = (second-first);
%}

function cpu_pfm_cleanup:long ()
%{
	if (_stp_perfmon_shutdown(desc)) printk("_stp_pfmk_shutdown error\n");
%}

probe begin
{
	cpu_pfm_init();
	printf("interval = %d\n", cpu_pfm_getdiff());
	cpu_pfm_cleanup();
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]