This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: [blktrace kernel patch] Separate out non-blktrace-specific code
- From: Tom Zanussi <zanussi at us dot ibm dot com>
- To: Tom Zanussi <zanussi at us dot ibm dot com>
- Cc: axboe at suse dot de, systemtap at sources dot redhat dot com, linux-btrace at vger dot kernel dot org
- Date: Fri, 6 Oct 2006 11:40:01 -0500
- Subject: Re: [blktrace kernel patch] Separate out non-blktrace-specific code
- References: <17701.38333.687992.592272@tut.ibm.com>
Tom Zanussi writes:
> This patch moves the non-block-specific code out of the core blktrace
> kernel code and moves it into a separate utt.* files, so that anyone
> can use it for generic tracing. It also adds a config option,
> CONFIG_UTT, and hooks up the remaining code in blktrace.c to use it.
> The accompanying userspace patch does the same thing for the userspace
> tools.
>
I should have mentioned that the kernel patch was against a recent git
kernel, and doesn't apply cleanly to a 2.6.18 stable kernel. Here's a
patch against 2.6.18 stable. I've only had time to compile-test it -
no time right now to do anything more, since I'm almost out the
door - hopefully it works...
Tom
diff -urpN -X dontdiff linux-2.6.18/arch/i386/kernel/tsc.c linux-2.6.18-utt/arch/i386/kernel/tsc.c
--- linux-2.6.18/arch/i386/kernel/tsc.c 2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.18-utt/arch/i386/kernel/tsc.c 2006-10-07 10:11:47.000000000 -0500
@@ -124,6 +124,8 @@ unsigned long long sched_clock(void)
/* return the value in ns */
return cycles_2_ns(this_offset);
}
+// utt hack for now
+EXPORT_SYMBOL_GPL(sched_clock);
static unsigned long calculate_cpu_khz(void)
{
diff -urpN -X dontdiff linux-2.6.18/block/Kconfig linux-2.6.18-utt/block/Kconfig
--- linux-2.6.18/block/Kconfig 2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.18-utt/block/Kconfig 2006-10-07 10:11:47.000000000 -0500
@@ -14,8 +14,7 @@ config LBD
config BLK_DEV_IO_TRACE
bool "Support for tracing block io actions"
depends on SYSFS
- select RELAY
- select DEBUG_FS
+ select UTT
help
Say Y here, if you want to be able to trace the block layer actions
on a given queue. Tracing allows you to see any traffic happening
@@ -24,6 +23,19 @@ config BLK_DEV_IO_TRACE
git://brick.kernel.dk/data/git/blktrace.git
+config UTT
+ bool "Unified Tracing Transport"
+ select RELAY
+ select DEBUG_FS
+ help
+ This option enables support for the tracing transport
+ used by tracing tools such as blktrace, LTT and systemtap.
+ The UTT can also be used as a tracing transport for one-off
+ tools by making use of a matching set of generic userspace
+ tools which can be found at some repository.
+
+ If unsure, say N.
+
config LSF
bool "Support for Large Single Files"
depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML
diff -urpN -X dontdiff linux-2.6.18/block/Makefile linux-2.6.18-utt/block/Makefile
--- linux-2.6.18/block/Makefile 2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.18-utt/block/Makefile 2006-10-07 10:11:47.000000000 -0500
@@ -10,3 +10,4 @@ obj-$(CONFIG_IOSCHED_DEADLINE) += deadli
obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+obj-$(CONFIG_UTT) += utt.o
diff -urpN -X dontdiff linux-2.6.18/block/blktrace.c linux-2.6.18-utt/block/blktrace.c
--- linux-2.6.18/block/blktrace.c 2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.18-utt/block/blktrace.c 2006-10-07 10:42:24.000000000 -0500
@@ -22,9 +22,9 @@
#include <linux/init.h>
#include <linux/mutex.h>
#include <linux/debugfs.h>
+#include <linux/utt.h>
#include <asm/uaccess.h>
-static DEFINE_PER_CPU(unsigned long long, blk_trace_cpu_offset) = { 0, };
static unsigned int blktrace_seq __read_mostly = 1;
/*
@@ -35,7 +35,7 @@ static void trace_note_tsk(struct blk_tr
{
struct blk_io_trace *t;
- t = relay_reserve(bt->rchan, sizeof(*t) + sizeof(tsk->comm));
+ t = relay_reserve(bt->utt->rchan, sizeof(*t) + sizeof(tsk->comm));
if (t) {
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->device = bt->dev;
@@ -96,7 +96,7 @@ void __blk_add_trace(struct blk_trace *b
pid_t pid;
int cpu;
- if (unlikely(bt->trace_state != Blktrace_running))
+ if (unlikely(bt->utt->trace_state != Utt_trace_running))
return;
what |= ddir_act[rw & WRITE];
@@ -121,14 +121,14 @@ void __blk_add_trace(struct blk_trace *b
if (unlikely(tsk->btrace_seq != blktrace_seq))
trace_note_tsk(bt, tsk);
- t = relay_reserve(bt->rchan, sizeof(*t) + pdu_len);
+ t = relay_reserve(bt->utt->rchan, sizeof(*t) + pdu_len);
if (t) {
cpu = smp_processor_id();
- sequence = per_cpu_ptr(bt->sequence, cpu);
+ sequence = per_cpu_ptr(bt->utt->sequence, cpu);
t->magic = BLK_IO_TRACE_MAGIC | BLK_IO_TRACE_VERSION;
t->sequence = ++(*sequence);
- t->time = sched_clock() - per_cpu(blk_trace_cpu_offset, cpu);
+ t->time = sched_clock() - per_cpu(utt_trace_cpu_offset, cpu);
t->sector = sector;
t->bytes = bytes;
t->action = what;
@@ -147,59 +147,6 @@ void __blk_add_trace(struct blk_trace *b
EXPORT_SYMBOL_GPL(__blk_add_trace);
-static struct dentry *blk_tree_root;
-static struct mutex blk_tree_mutex;
-static unsigned int root_users;
-
-static inline void blk_remove_root(void)
-{
- if (blk_tree_root) {
- debugfs_remove(blk_tree_root);
- blk_tree_root = NULL;
- }
-}
-
-static void blk_remove_tree(struct dentry *dir)
-{
- mutex_lock(&blk_tree_mutex);
- debugfs_remove(dir);
- if (--root_users == 0)
- blk_remove_root();
- mutex_unlock(&blk_tree_mutex);
-}
-
-static struct dentry *blk_create_tree(const char *blk_name)
-{
- struct dentry *dir = NULL;
-
- mutex_lock(&blk_tree_mutex);
-
- if (!blk_tree_root) {
- blk_tree_root = debugfs_create_dir("block", NULL);
- if (!blk_tree_root)
- goto err;
- }
-
- dir = debugfs_create_dir(blk_name, blk_tree_root);
- if (dir)
- root_users++;
- else
- blk_remove_root();
-
-err:
- mutex_unlock(&blk_tree_mutex);
- return dir;
-}
-
-static void blk_trace_cleanup(struct blk_trace *bt)
-{
- relay_close(bt->rchan);
- debugfs_remove(bt->dropped_file);
- blk_remove_tree(bt->dir);
- free_percpu(bt->sequence);
- kfree(bt);
-}
-
static int blk_trace_remove(request_queue_t *q)
{
struct blk_trace *bt;
@@ -208,76 +155,9 @@ static int blk_trace_remove(request_queu
if (!bt)
return -EINVAL;
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped)
- blk_trace_cleanup(bt);
-
- return 0;
-}
-
-static int blk_dropped_open(struct inode *inode, struct file *filp)
-{
- filp->private_data = inode->u.generic_ip;
-
- return 0;
-}
-
-static ssize_t blk_dropped_read(struct file *filp, char __user *buffer,
- size_t count, loff_t *ppos)
-{
- struct blk_trace *bt = filp->private_data;
- char buf[16];
-
- snprintf(buf, sizeof(buf), "%u\n", atomic_read(&bt->dropped));
-
- return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
-}
-
-static struct file_operations blk_dropped_fops = {
- .owner = THIS_MODULE,
- .open = blk_dropped_open,
- .read = blk_dropped_read,
-};
-
-/*
- * Keep track of how many times we encountered a full subbuffer, to aid
- * the user space app in telling how many lost events there were.
- */
-static int blk_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
- void *prev_subbuf, size_t prev_padding)
-{
- struct blk_trace *bt;
-
- if (!relay_buf_full(buf))
- return 1;
-
- bt = buf->chan->private_data;
- atomic_inc(&bt->dropped);
- return 0;
-}
-
-static int blk_remove_buf_file_callback(struct dentry *dentry)
-{
- debugfs_remove(dentry);
- return 0;
-}
-
-static struct dentry *blk_create_buf_file_callback(const char *filename,
- struct dentry *parent,
- int mode,
- struct rchan_buf *buf,
- int *is_global)
-{
- return debugfs_create_file(filename, mode, parent, buf,
- &relay_file_operations);
+ return utt_trace_remove(bt->utt);
}
-static struct rchan_callbacks blk_relay_callbacks = {
- .subbuf_start = blk_subbuf_start_callback,
- .create_buf_file = blk_create_buf_file_callback,
- .remove_buf_file = blk_remove_buf_file_callback,
-};
-
/*
* Setup everything required to start tracing
*/
@@ -286,25 +166,22 @@ static int blk_trace_setup(request_queue
{
struct blk_user_trace_setup buts;
struct blk_trace *old_bt, *bt = NULL;
- struct dentry *dir = NULL;
char b[BDEVNAME_SIZE];
int ret, i;
if (copy_from_user(&buts, arg, sizeof(buts)))
return -EFAULT;
- if (!buts.buf_size || !buts.buf_nr)
- return -EINVAL;
-
- strcpy(buts.name, bdevname(bdev, b));
+ strcpy(buts.utts.root, "block");
+ strcpy(buts.utts.name, bdevname(bdev, b));
/*
* some device names have larger paths - convert the slashes
* to underscores for this to work as expected
*/
- for (i = 0; i < strlen(buts.name); i++)
- if (buts.name[i] == '/')
- buts.name[i] = '_';
+ for (i = 0; i < strlen(buts.utts.name); i++)
+ if (buts.utts.name[i] == '/')
+ buts.utts.name[i] = '_';
if (copy_to_user(arg, &buts, sizeof(buts)))
return -EFAULT;
@@ -314,28 +191,13 @@ static int blk_trace_setup(request_queue
if (!bt)
goto err;
- bt->sequence = alloc_percpu(unsigned long);
- if (!bt->sequence)
+ bt->utt = utt_trace_setup(&buts.utts);
+ if (!bt->utt) {
+ ret = buts.utts.err;
goto err;
-
- ret = -ENOENT;
- dir = blk_create_tree(buts.name);
- if (!dir)
- goto err;
-
- bt->dir = dir;
+ }
+
bt->dev = bdev->bd_dev;
- atomic_set(&bt->dropped, 0);
-
- ret = -EIO;
- bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
- if (!bt->dropped_file)
- goto err;
-
- bt->rchan = relay_open("trace", dir, buts.buf_size, buts.buf_nr, &blk_relay_callbacks);
- if (!bt->rchan)
- goto err;
- bt->rchan->private_data = bt;
bt->act_mask = buts.act_mask;
if (!bt->act_mask)
@@ -347,7 +209,6 @@ static int blk_trace_setup(request_queue
bt->end_lba = -1ULL;
bt->pid = buts.pid;
- bt->trace_state = Blktrace_setup;
ret = -EBUSY;
old_bt = xchg(&q->blk_trace, bt);
@@ -358,15 +219,9 @@ static int blk_trace_setup(request_queue
return 0;
err:
- if (dir)
- blk_remove_tree(dir);
if (bt) {
- if (bt->dropped_file)
- debugfs_remove(bt->dropped_file);
- if (bt->sequence)
- free_percpu(bt->sequence);
- if (bt->rchan)
- relay_close(bt->rchan);
+ if (bt->utt)
+ utt_trace_cleanup(bt->utt);
kfree(bt);
}
return ret;
@@ -375,33 +230,11 @@ err:
static int blk_trace_startstop(request_queue_t *q, int start)
{
struct blk_trace *bt;
- int ret;
if ((bt = q->blk_trace) == NULL)
return -EINVAL;
- /*
- * For starting a trace, we can transition from a setup or stopped
- * trace. For stopping a trace, the state must be running
- */
- ret = -EINVAL;
- if (start) {
- if (bt->trace_state == Blktrace_setup ||
- bt->trace_state == Blktrace_stopped) {
- blktrace_seq++;
- smp_mb();
- bt->trace_state = Blktrace_running;
- ret = 0;
- }
- } else {
- if (bt->trace_state == Blktrace_running) {
- bt->trace_state = Blktrace_stopped;
- relay_flush(bt->rchan);
- ret = 0;
- }
- }
-
- return ret;
+ return utt_trace_startstop(bt->utt, start, &blktrace_seq);
}
/**
@@ -454,85 +287,8 @@ void blk_trace_shutdown(request_queue_t
blk_trace_remove(q);
}
-/*
- * Average offset over two calls to sched_clock() with a gettimeofday()
- * in the middle
- */
-static void blk_check_time(unsigned long long *t)
-{
- unsigned long long a, b;
- struct timeval tv;
-
- a = sched_clock();
- do_gettimeofday(&tv);
- b = sched_clock();
-
- *t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
- *t -= (a + b) / 2;
-}
-
-static void blk_trace_check_cpu_time(void *data)
-{
- unsigned long long *t;
- int cpu = get_cpu();
-
- t = &per_cpu(blk_trace_cpu_offset, cpu);
-
- /*
- * Just call it twice, hopefully the second call will be cache hot
- * and a little more precise
- */
- blk_check_time(t);
- blk_check_time(t);
-
- put_cpu();
-}
-
-/*
- * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
- * timings
- */
-static void blk_trace_calibrate_offsets(void)
-{
- unsigned long flags;
-
- smp_call_function(blk_trace_check_cpu_time, NULL, 1, 1);
- local_irq_save(flags);
- blk_trace_check_cpu_time(NULL);
- local_irq_restore(flags);
-}
-
-static void blk_trace_set_ht_offsets(void)
-{
-#if defined(CONFIG_SCHED_SMT)
- int cpu, i;
-
- /*
- * now make sure HT siblings have the same time offset
- */
- preempt_disable();
- for_each_online_cpu(cpu) {
- unsigned long long *cpu_off, *sibling_off;
-
- for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
- if (i == cpu)
- continue;
-
- cpu_off = &per_cpu(blk_trace_cpu_offset, cpu);
- sibling_off = &per_cpu(blk_trace_cpu_offset, i);
- *sibling_off = *cpu_off;
- }
- }
- preempt_enable();
-#endif
-}
-
static __init int blk_trace_init(void)
{
- mutex_init(&blk_tree_mutex);
- blk_trace_calibrate_offsets();
- blk_trace_set_ht_offsets();
-
return 0;
}
diff -urpN -X dontdiff linux-2.6.18/block/utt.c linux-2.6.18-utt/block/utt.c
--- linux-2.6.18/block/utt.c 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.18-utt/block/utt.c 2006-10-07 10:54:19.000000000 -0500
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2006 Jens Axboe <axboe@suse.de>
+ *
+ * Moved to utt.c by Tom Zanussi, 2006
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/percpu.h>
+#include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
+#include <linux/utt.h>
+
+DEFINE_PER_CPU(unsigned long long, utt_trace_cpu_offset) = { 0, };
+EXPORT_PER_CPU_SYMBOL(utt_trace_cpu_offset);
+
+static inline void utt_remove_root(struct utt_trace *utt)
+{
+ if (utt->utt_tree_root && simple_empty(utt->utt_tree_root)) {
+ debugfs_remove(utt->utt_tree_root);
+ utt->utt_tree_root = NULL;
+ }
+}
+
+static void utt_remove_tree(struct utt_trace *utt)
+{
+ mutex_lock(&utt->utt_tree_mutex);
+ debugfs_remove(utt->dir);
+ if (--utt->root_users == 0)
+ utt_remove_root(utt);
+ mutex_unlock(&utt->utt_tree_mutex);
+}
+
+static struct dentry *utt_create_tree(struct utt_trace *utt, const char *root,
+ const char *name)
+{
+ struct dentry *dir = NULL;
+
+ if (root == NULL || name == NULL)
+ return NULL;
+
+ mutex_lock(&utt->utt_tree_mutex);
+
+ if (!utt->utt_tree_root) {
+ utt->utt_tree_root = debugfs_create_dir(root, NULL);
+ if (!utt->utt_tree_root)
+ goto err;
+ }
+
+ dir = debugfs_create_dir(name, utt->utt_tree_root);
+ if (dir)
+ utt->root_users++;
+ else
+ utt_remove_root(utt);
+
+err:
+ mutex_unlock(&utt->utt_tree_mutex);
+ return dir;
+}
+
+void utt_trace_cleanup(struct utt_trace *utt)
+{
+ relay_close(utt->rchan);
+ debugfs_remove(utt->dropped_file);
+ utt_remove_tree(utt);
+ free_percpu(utt->sequence);
+ kfree(utt);
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_cleanup);
+
+int utt_trace_remove(struct utt_trace *utt)
+{
+ if (utt->trace_state == Utt_trace_setup ||
+ utt->trace_state == Utt_trace_stopped)
+ utt_trace_cleanup(utt);
+
+ return 0;
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_remove);
+
+static int utt_dropped_open(struct inode *inode, struct file *filp)
+{
+ filp->private_data = inode->u.generic_ip;
+
+ return 0;
+}
+
+static ssize_t utt_dropped_read(struct file *filp, char __user *buffer,
+ size_t count, loff_t *ppos)
+{
+ struct utt_trace *utt = filp->private_data;
+ char buf[16];
+
+ snprintf(buf, sizeof(buf), "%u\n", atomic_read(&utt->dropped));
+
+ return simple_read_from_buffer(buffer, count, ppos, buf, strlen(buf));
+}
+
+static struct file_operations utt_dropped_fops = {
+ .owner = THIS_MODULE,
+ .open = utt_dropped_open,
+ .read = utt_dropped_read,
+};
+
+/*
+ * Keep track of how many times we encountered a full subbuffer, to aid
+ * the user space app in telling how many lost events there were.
+ */
+static int utt_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
+ void *prev_subbuf, size_t prev_padding)
+{
+ struct utt_trace *utt;
+
+ if (!relay_buf_full(buf))
+ return 1;
+
+ utt = buf->chan->private_data;
+ atomic_inc(&utt->dropped);
+ return 0;
+}
+
+static int utt_remove_buf_file_callback(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+ return 0;
+}
+
+static struct dentry *utt_create_buf_file_callback(const char *filename,
+ struct dentry *parent,
+ int mode,
+ struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+static struct rchan_callbacks utt_relay_callbacks = {
+ .subbuf_start = utt_subbuf_start_callback,
+ .create_buf_file = utt_create_buf_file_callback,
+ .remove_buf_file = utt_remove_buf_file_callback,
+};
+
+/*
+ * Setup everything required to start tracing
+ */
+struct utt_trace *utt_trace_setup(struct utt_trace_setup *utts)
+{
+ struct utt_trace *utt = NULL;
+ struct dentry *dir = NULL;
+ int ret = -EINVAL;
+
+ if (!utts->buf_size || !utts->buf_nr)
+ goto err;
+
+ ret = -ENOMEM;
+ utt = kzalloc(sizeof(*utt), GFP_KERNEL);
+ if (!utt)
+ goto err;
+
+ mutex_init(&utt->utt_tree_mutex);
+
+ utt->sequence = alloc_percpu(unsigned long);
+ if (!utt->sequence)
+ goto err;
+
+ ret = -ENOENT;
+ dir = utt_create_tree(utt, utts->root, utts->name);
+ if (!dir)
+ goto err;
+
+ utt->dir = dir;
+ atomic_set(&utt->dropped, 0);
+
+ ret = -EIO;
+ utt->dropped_file = debugfs_create_file("dropped", 0444, dir, utt, &utt_dropped_fops);
+ if (!utt->dropped_file)
+ goto err;
+
+ utt->rchan = relay_open("trace", dir, utts->buf_size, utts->buf_nr, &utt_relay_callbacks);
+ if (!utt->rchan)
+ goto err;
+ utt->rchan->private_data = utt;
+
+ utt->trace_state = Utt_trace_setup;
+
+ utts->err = 0;
+ return utt;
+err:
+ if (utt) {
+ if (utt->dropped_file)
+ debugfs_remove(utt->dropped_file);
+ if (utt->sequence)
+ free_percpu(utt->sequence);
+ if (utt->rchan)
+ relay_close(utt->rchan);
+ kfree(utt);
+ }
+ if (dir)
+ utt_remove_tree(utt);
+ utts->err = ret;
+ return NULL;
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_setup);
+
+int utt_trace_startstop(struct utt_trace *utt, int start,
+ unsigned int *trace_seq)
+{
+ int ret;
+
+ /*
+ * For starting a trace, we can transition from a setup or stopped
+ * trace. For stopping a trace, the state must be running
+ */
+ ret = -EINVAL;
+ if (start) {
+ if (utt->trace_state == Utt_trace_setup ||
+ utt->trace_state == Utt_trace_stopped) {
+ if (trace_seq)
+ (*trace_seq)++;
+ smp_mb();
+ utt->trace_state = Utt_trace_running;
+ ret = 0;
+ }
+ } else {
+ if (utt->trace_state == Utt_trace_running) {
+ utt->trace_state = Utt_trace_stopped;
+ relay_flush(utt->rchan);
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+EXPORT_SYMBOL_GPL(utt_trace_startstop);
+
+/*
+ * Average offset over two calls to sched_clock() with a gettimeofday()
+ * in the middle
+ */
+static void utt_check_time(unsigned long long *t)
+{
+ unsigned long long a, b;
+ struct timeval tv;
+
+ a = sched_clock();
+ do_gettimeofday(&tv);
+ b = sched_clock();
+
+ *t = tv.tv_sec * 1000000000 + tv.tv_usec * 1000;
+ *t -= (a + b) / 2;
+}
+
+static void utt_check_cpu_time(void *data)
+{
+ unsigned long long *t;
+ int cpu = get_cpu();
+
+ t = &per_cpu(utt_trace_cpu_offset, cpu);
+
+ /*
+ * Just call it twice, hopefully the second call will be cache hot
+ * and a little more precise
+ */
+ utt_check_time(t);
+ utt_check_time(t);
+
+ put_cpu();
+}
+
+/*
+ * Call blk_trace_check_cpu_time() on each CPU to calibrate our inter-CPU
+ * timings
+ */
+static void utt_calibrate_offsets(void)
+{
+ unsigned long flags;
+
+ smp_call_function(utt_check_cpu_time, NULL, 1, 1);
+ local_irq_save(flags);
+ utt_check_cpu_time(NULL);
+ local_irq_restore(flags);
+}
+
+static void utt_set_ht_offsets(void)
+{
+#if defined(CONFIG_SCHED_SMT)
+ int cpu, i;
+
+ /*
+ * now make sure HT siblings have the same time offset
+ */
+ preempt_disable();
+ for_each_online_cpu(cpu) {
+ unsigned long long *cpu_off, *sibling_off;
+
+ for_each_cpu_mask(i, cpu_sibling_map[cpu]) {
+ if (i == cpu)
+ continue;
+
+ cpu_off = &per_cpu(utt_trace_cpu_offset, cpu);
+ sibling_off = &per_cpu(utt_trace_cpu_offset, i);
+ *sibling_off = *cpu_off;
+ }
+ }
+ preempt_enable();
+#endif
+}
+
+static __init int utt_init(void)
+{
+ utt_calibrate_offsets();
+ utt_set_ht_offsets();
+
+ return 0;
+}
+
+module_init(utt_init);
+
diff -urpN -X dontdiff linux-2.6.18/include/linux/blktrace_api.h linux-2.6.18-utt/include/linux/blktrace_api.h
--- linux-2.6.18/include/linux/blktrace_api.h 2006-09-19 22:42:06.000000000 -0500
+++ linux-2.6.18-utt/include/linux/blktrace_api.h 2006-10-07 10:11:47.000000000 -0500
@@ -2,7 +2,7 @@
#define BLKTRACE_H
#include <linux/blkdev.h>
-#include <linux/relay.h>
+#include <linux/utt.h>
/*
* Trace categories
@@ -96,34 +96,21 @@ struct blk_io_trace_remap {
__be64 sector;
};
-enum {
- Blktrace_setup = 1,
- Blktrace_running,
- Blktrace_stopped,
-};
-
struct blk_trace {
- int trace_state;
- struct rchan *rchan;
- unsigned long *sequence;
+ struct utt_trace *utt;
u16 act_mask;
u64 start_lba;
u64 end_lba;
u32 pid;
u32 dev;
- struct dentry *dir;
- struct dentry *dropped_file;
- atomic_t dropped;
};
/*
* User setup structure passed with BLKTRACESTART
*/
struct blk_user_trace_setup {
- char name[BDEVNAME_SIZE]; /* output */
+ struct utt_trace_setup utts;
u16 act_mask; /* input */
- u32 buf_size; /* input */
- u32 buf_nr; /* input */
u64 start_lba;
u64 end_lba;
u32 pid;
diff -urpN -X dontdiff linux-2.6.18/include/linux/utt.h linux-2.6.18-utt/include/linux/utt.h
--- linux-2.6.18/include/linux/utt.h 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.18-utt/include/linux/utt.h 2006-10-07 10:11:47.000000000 -0500
@@ -0,0 +1,53 @@
+#ifndef UTT_H
+#define UTT_H
+
+#include <linux/relay.h>
+
+enum {
+ Utt_trace_setup = 1,
+ Utt_trace_running,
+ Utt_trace_stopped,
+};
+
+struct utt_trace {
+ int trace_state;
+ struct rchan *rchan;
+ unsigned long *sequence;
+ struct dentry *dir;
+ struct dentry *dropped_file;
+ atomic_t dropped;
+ struct dentry *utt_tree_root;
+ struct mutex utt_tree_mutex;
+ unsigned int root_users;
+ void *private_data;
+};
+
+#define UTT_TRACE_ROOT_NAME_SIZE 32 /* Largest string for a root dir identifier */
+#define UTT_TRACE_NAME_SIZE 32 /* Largest string for a trace identifier */
+
+/*
+ * User setup structure
+ */
+struct utt_trace_setup {
+ char root[UTT_TRACE_ROOT_NAME_SIZE]; /* input */
+ char name[UTT_TRACE_NAME_SIZE]; /* input */
+ u32 buf_size; /* input */
+ u32 buf_nr; /* input */
+ int err; /* output */
+};
+
+#if defined(CONFIG_UTT)
+DECLARE_PER_CPU(unsigned long long, utt_trace_cpu_offset);
+extern struct utt_trace *utt_trace_setup(struct utt_trace_setup *utts);
+extern int utt_trace_startstop(struct utt_trace *utt, int start,
+ unsigned int *trace_seq);
+extern void utt_trace_cleanup(struct utt_trace *utt);
+extern int utt_trace_remove(struct utt_trace *utt);
+#else /* !CONFIG_UTT */
+#define utt_trace_setup(utts) (NULL)
+#define utt_trace_startstop(utt, start, trace_seq) (-EINVAL)
+#define utt_trace_cleanup(utt) do { } while (0)
+#define utt_trace_remove(utt) (-EINVAL)
+#endif /* CONFIG_UTT */
+
+#endif