This is the mail archive of the systemtap@sourceware.org mailing list for the systemtap project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH]kprobe booster for IA64


Hi, Anil and Ananth

I ported the kprobe-booster to the IA64 architecture.
This patch can be applied against 2.6.17-rc5-mm3.
And here is the patch. Could you review it?

This patch modifies kprobe as below;
- Boost if the target bundle don't use B and X unit.
- Introduce INST_FLAG_BOOSTABLE value for ainsn.insn_flag.
  If this flag is set, the kprobe is boostable.
- Change instruction buffer(ainsn.insn) to an array of
  bundles which has three elements. The 2nd element and the
  3rd element of this array are used for dynamic execution.

And this patch is Lindent clean ;)

I measured the overhead of the booster by using
the benchmark kernel module attached to this mail.

noprobe: 436 machine cycles
noboost: 1162 machine cycles
boosted: 829 machine cycles

CPU spec: Itanium2 1.3GHz (2CPUs)

-- 
Masami HIRAMATSU
2nd Research Dept.
Hitachi, Ltd., Systems Development Laboratory
E-mail: hiramatu@sdl.hitachi.co.jp

 arch/ia64/kernel/kprobes.c |   84 +++++++++++++++++++++++++++++++++++++++++----
 include/asm-ia64/kprobes.h |    9 +++-
 2 files changed, 85 insertions(+), 8 deletions(-)
diff --exclude=CVS -Narup a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
--- a/arch/ia64/kernel/kprobes.c	2006-06-05 13:02:54.000000000 +0900
+++ b/arch/ia64/kernel/kprobes.c	2006-06-05 13:34:07.000000000 +0900
@@ -78,6 +78,35 @@ static enum instruction_type bundle_enco
 };

 /*
+ * In this function, we check whether the target bundle is possible
+ * to modify IP.
+ */
+static __always_inline int can_boost(uint template)
+{
+	template &= 0x1e;
+	if (template >= 0x10 ||	/* including B unit */
+	    template == 0x04 ||	/* including X unit */
+	    template == 0x06) {	/* undefined */
+		return 0;
+	}
+	return 1;
+}
+
+/* Insert a long branch code */
+static __always_inline void set_brl_inst(void *from, void *to)
+{
+	s64 rel = ((s64) to - (s64) from) >> 4;
+	bundle_t *brl;
+	brl = (bundle_t *) ((u64) from & ~0xf);
+	brl->quad0.template = 0x05;	/* [MLX](stop) */
+	brl->quad0.slot0 = NOP_M_INST;	/* nop.m 0x0 */
+	brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2;
+	brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46);
+	/* brl.cond.sptk.many.clr rel<<4 (qp=0) */
+	brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff);
+}
+
+/*
  * In this function we check to see if the instruction
  * is IP relative instruction and update the kprobe
  * inst flag accordingly
@@ -125,6 +154,10 @@ static void __kprobes update_kprobe_inst
 		  break;
 		}
 	}
+
+	if (can_boost(template)) {
+		p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE;
+	}
 	return;
 }

@@ -218,7 +251,7 @@ static void __kprobes prepare_break_inst
 					 struct kprobe *p)
 {
 	unsigned long break_inst = BREAK_INST;
-	bundle_t *bundle = &p->ainsn.insn.bundle;
+	bundle_t *bundle = &p->ainsn.insn[0].bundle;

 	/*
 	 * Copy the original kprobe_inst qualifying predicate(qp)
@@ -249,6 +282,14 @@ static void __kprobes prepare_break_inst
 	 * single step on original instruction
 	 */
 	update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p);
+
+	/* If the bundle can be boosted, prepare boost bundles */
+	if (p->ainsn.inst_flag & INST_FLAG_BOOSTABLE) {
+		memcpy(&p->ainsn.insn[1].bundle, &p->opcode.bundle,
+		       sizeof(bundle_t));
+		set_brl_inst(&p->ainsn.insn[2].bundle,
+			     (bundle_t *) p->addr + 1);
+	}
 }

 static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot,
@@ -424,10 +465,10 @@ int __kprobes arch_prepare_kprobe(struct
 	unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL);
 	unsigned long kprobe_inst=0;
 	unsigned int slot = addr & 0xf, template, major_opcode = 0;
-	bundle_t *bundle = &p->ainsn.insn.bundle;
+	bundle_t *bundle = &p->ainsn.insn[0].bundle;

 	memcpy(&p->opcode.bundle, kprobe_addr, sizeof(bundle_t));
-	memcpy(&p->ainsn.insn.bundle, kprobe_addr, sizeof(bundle_t));
+	memcpy(&p->ainsn.insn[0].bundle, kprobe_addr, sizeof(bundle_t));

  	template = bundle->quad0.template;

@@ -454,7 +495,7 @@ void __kprobes arch_arm_kprobe(struct kp
 	unsigned long addr = (unsigned long)p->addr;
 	unsigned long arm_addr = addr & ~0xFULL;

-	memcpy((char *)arm_addr, &p->ainsn.insn.bundle, sizeof(bundle_t));
+	memcpy((char *)arm_addr, &p->ainsn.insn[0].bundle, sizeof(bundle_t));
 	flush_icache_range(arm_addr, arm_addr + sizeof(bundle_t));
 }

@@ -471,7 +512,7 @@ void __kprobes arch_disarm_kprobe(struct
 /*
  * We are resuming execution after a single step fault, so the pt_regs
  * structure reflects the register state after we executed the instruction
- * located in the kprobe (p->ainsn.insn.bundle).  We still need to adjust
+ * located in the kprobe (p->ainsn.insn[0].bundle).  We still need to adjust
  * the ip to point back to the original stack address. To set the IP address
  * to original stack address, handle the case where we need to fixup the
  * relative IP address and/or fixup branch register.
@@ -488,7 +529,7 @@ static void __kprobes resume_execution(s
  	if (slot == 1 && bundle_encoding[template][1] == L)
  		slot = 2;

-	if (p->ainsn.inst_flag) {
+	if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) {

 		if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) {
 			/* Fix relative IP address */
@@ -563,6 +604,24 @@ static void __kprobes prepare_ss(struct
 	ia64_psr(regs)->ss = 1;
 }

+/* prepare to execute directly */
+static void __kprobes prepare_boost(struct kprobe *p, struct pt_regs *regs)
+{
+	unsigned long slot = (unsigned long)p->addr & 0xf;
+
+	regs->cr_iip = (unsigned long)&p->ainsn.insn[1].bundle & ~0xFULL;
+
+	if (slot > 2)
+		slot = 0;
+
+	ia64_psr(regs)->ri = slot;
+
+	/* turn off single stepping */
+	ia64_psr(regs)->ss = 0;
+
+	reset_current_kprobe();
+}
+
 static int __kprobes is_ia64_break_inst(struct pt_regs *regs)
 {
 	unsigned int slot = ia64_psr(regs)->ri;
@@ -602,6 +661,11 @@ static int __kprobes pre_kprobes_handler
 	struct pt_regs *regs = args->regs;
 	kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs);
 	struct kprobe_ctlblk *kcb;
+#ifdef CONFIG_PREEMPT
+	unsigned pre_preempt_count = preempt_count();
+#else
+	unsigned pre_preempt_count = 1;
+#endif

 	/*
 	 * We don't want to be preempted for the entire
@@ -681,6 +745,14 @@ static int __kprobes pre_kprobes_handler
 		 */
 		return 1;

+	if (pre_preempt_count && p->ainsn.inst_flag == INST_FLAG_BOOSTABLE &&
+	    !p->post_handler) {
+		/* Boost up -- we can execute copied instructions directly */
+		prepare_boost(p, regs);
+		preempt_enable_no_resched();
+		return 1;
+	}
+
 ss_probe:
 	prepare_ss(p, regs);
 	kcb->kprobe_status = KPROBE_HIT_SS;
diff --exclude=CVS -Narup a/include/asm-ia64/kprobes.h b/include/asm-ia64/kprobes.h
--- a/include/asm-ia64/kprobes.h	2006-06-05 13:03:07.000000000 +0900
+++ b/include/asm-ia64/kprobes.h	2006-06-05 13:34:07.000000000 +0900
@@ -29,8 +29,12 @@
 #include <linux/percpu.h>
 #include <asm/break.h>

-#define MAX_INSN_SIZE   16
+#define MAX_INSN_SIZE   3	/* 3 bundles */
 #define BREAK_INST	(long)(__IA64_BREAK_KPROBE << 6)
+#define NOP_M_INST	(long)(1<<27)
+#define BRL_INST(i1,i2) (long)((0xcL << 37) |	/* brl */  \
+			       (1L << 12) |	/* many */ \
+			       (((i1) & 1) << 36) | ((i2) << 13))	/* imm */

 typedef union cmp_inst {
 	struct {
@@ -108,10 +112,11 @@ struct fnptr {
 /* Architecture specific copy of original instruction*/
 struct arch_specific_insn {
 	/* copy of the instruction to be emulated */
-	kprobe_opcode_t insn;
+	kprobe_opcode_t insn[3];
  #define INST_FLAG_FIX_RELATIVE_IP_ADDR		1
  #define INST_FLAG_FIX_BRANCH_REG		2
  #define INST_FLAG_BREAK_INST			4
+ #define INST_FLAG_BOOSTABLE			8
  	unsigned long inst_flag;
  	unsigned short target_br_reg;
 };

/*
 * boost probe bench 
 * Copyright (c) 2006 Hitachi,Ltd.,
 * Created by Masami Hiramatsu<hiramatu@sdl.hitachi.co.jp>
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/kprobes.h>

MODULE_AUTHOR("M.Hiramatsu");
MODULE_LICENSE("GPL");

int dummy_function(int n)
{
	int k,l=0;
	for (k=1; k<n; k++) {
		l += (k + 1) / k;
	}
	return l;
}

static int probe_handler (struct kprobe * kp,
			  struct pt_regs * regs)
{
	return 0;
}

#include <linux/time.h>
#define CALLB 14
#define CALLN (1<<CALLB)

static int bench_probe(void) 
{
	int i;
	cycles_t c = get_cycles();
	for (i=0; i<CALLN; i++) {
		dummy_function(10);
	}
	return (get_cycles() - c)>>CALLB;
}

static struct kprobe kp;

static int install_probe(void) 
{
	int ret = -10000;
	
	kp.pre_handler = probe_handler;
	kp.addr = *(void **)dummy_function;

	printk("noprobe: %d machine cycles\n", bench_probe());
	ret = register_kprobe(&kp);
	if (ret != 0) {
		printk("probe install error: %d\n",ret);
		return -EINVAL;
	}
	printk("boosted: %d machine cycles \n", bench_probe());
	kp.ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE;
	printk("noboost: %d machine cycles\n", bench_probe());
	
	unregister_kprobe(&kp);
	return -1;
}

static void uninstall_probe(void)
{
	printk("module removed\n");
}

module_init(install_probe);
module_exit(uninstall_probe);

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]