This is the mail archive of the
systemtap@sourceware.org
mailing list for the systemtap project.
Re: [RFC PATCH 2/3] Djprobe improvement patches (Re: Dynamic djprobe)
- From: Mathieu Desnoyers <compudj at krystal dot dyndns dot org>
- To: Masami Hiramatsu <hiramatu at sdl dot hitachi dot co dot jp>
- Cc: systemtap at sources dot redhat dot com, "Keshavamurthy, Anil S" <anil dot s dot keshavamurthy at intel dot com>, Roland McGrath <roland at redhat dot com>, Richard J Moore <richardj_moore at uk dot ibm dot com>, Andi Kleen <ak at muc dot de>, michel dot dagenais at polymtl dot ca, "Frank Ch. Eigler" <fche at redhat dot com>, Karim Yaghmour <karim at opersys dot com>, Satoshi Oshima <soshima at redhat dot com>, Hideo Aoki <haoki at redhat dot com>, Yumiko Sugita <sugita at sdl dot hitachi dot co dot jp>
- Date: Wed, 3 Aug 2005 23:46:42 -0400
- Subject: Re: [RFC PATCH 2/3] Djprobe improvement patches (Re: Dynamic djprobe)
- References: <42EA740A.10601@sdl.hitachi.co.jp> <42EFA85A.6010901@sdl.hitachi.co.jp>
If the stack inspection becomes considered too costy (push/pop at each interrupt
entry/exit) or too uncertain (looking at all the stack's data), I would suggest
a different approach for this interruption problem :
1 - Replace the first byte of the segment you want to modify with a int3
2 - Wait until each other CPU has reach a point where we know they are not in
interrupt context anymore (interrupt or softirq). This could be done by
spawning a kernel thread on each CPU which would inform us when it runs. If
there is no kernel preemption, then we are sure there is no interruption
having a wrong EIP saved on its stack.
3 - Write the jmp instruction.
In a context where there is no preemption, this should work.
* Masami Hiramatsu (hiramatu@sdl.hitachi.co.jp) wrote:
> Hi,
>
> This patch is for finding the executing address hidden by nested
> interrupts. I introduced an address_stack structure, and
> push/pop_iaddr() macros. push_iaddr() stores interrupted executing
> address into address_stack percpu stacks and increments a counter.
> pop_iaddr() just decrements the counter. We can find hidden addresses
> from address_stack and check it.
> But Satoshi pointed out the possibility of performance degradation. So I
> am developing another patch to find hidden address from current thread?s
> stack.
>
> Thanks,
>
> --
> Masami HIRAMATSU
> 2nd Research Dept.
> Hitachi, Ltd., Systems Development Laboratory
> E-mail: hiramatu@sdl.hitachi.co.jp
>
>
> arch/i386/kernel/apic.c | 7 +++++++
> arch/i386/kernel/cpu/mcheck/p4.c | 3 +++
> arch/i386/kernel/irq.c | 3 +++
> arch/i386/kernel/kprobes.c | 2 +-
> arch/i386/kernel/smp.c | 3 +++
> arch/i386/kernel/traps.c | 4 +++-
> include/linux/kprobes.h | 20 ++++++++++++++++++++
> kernel/kprobes.c | 22 ++++++++++++++++++++++
> 8 files changed, 62 insertions(+), 2 deletions(-)
> diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/apic.c linux-2.6.12-djprobe.3/arch/i386/kernel/apic.c
> --- linux-2.6.12-djprobe.2/arch/i386/kernel/apic.c 2005-06-18 04:48:29.000000000 +0900
> +++ linux-2.6.12-djprobe.3/arch/i386/kernel/apic.c 2005-08-02 20:30:10.000000000 +0900
> @@ -26,6 +26,7 @@
> #include <linux/mc146818rtc.h>
> #include <linux/kernel_stat.h>
> #include <linux/sysdev.h>
> +#include <linux/kprobes.h>
>
> #include <asm/atomic.h>
> #include <asm/smp.h>
> @@ -1178,7 +1179,9 @@ fastcall void smp_apic_timer_interrupt(s
> * interrupt lock, which is the WrongThing (tm) to do.
> */
> irq_enter();
> + push_iaddr((void*)regs->eip);
> smp_local_timer_interrupt(regs);
> + pop_iaddr();
> irq_exit();
> }
>
> @@ -1190,6 +1193,7 @@ fastcall void smp_spurious_interrupt(str
> unsigned long v;
>
> irq_enter();
> + push_iaddr((void*)regs->eip);
> /*
> * Check if this really is a spurious interrupt and ACK it
> * if it is a vectored one. Just in case...
> @@ -1202,6 +1206,7 @@ fastcall void smp_spurious_interrupt(str
> /* see sw-dev-man vol 3, chapter 7.4.13.5 */
> printk(KERN_INFO "spurious APIC interrupt on CPU#%d, should never happen.\n",
> smp_processor_id());
> + pop_iaddr();
> irq_exit();
> }
>
> @@ -1214,6 +1219,7 @@ fastcall void smp_error_interrupt(struct
> unsigned long v, v1;
>
> irq_enter();
> + push_iaddr((void*)regs->eip);
> /* First tickle the hardware, only then report what went on. -- REW */
> v = apic_read(APIC_ESR);
> apic_write(APIC_ESR, 0);
> @@ -1233,6 +1239,7 @@ fastcall void smp_error_interrupt(struct
> */
> printk (KERN_DEBUG "APIC error on CPU%d: %02lx(%02lx)\n",
> smp_processor_id(), v , v1);
> + pop_iaddr();
> irq_exit();
> }
>
> diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/cpu/mcheck/p4.c linux-2.6.12-djprobe.3/arch/i386/kernel/cpu/mcheck/p4.c
> --- linux-2.6.12-djprobe.2/arch/i386/kernel/cpu/mcheck/p4.c 2005-06-18 04:48:29.000000000 +0900
> +++ linux-2.6.12-djprobe.3/arch/i386/kernel/cpu/mcheck/p4.c 2005-08-02 20:30:10.000000000 +0900
> @@ -9,6 +9,7 @@
> #include <linux/irq.h>
> #include <linux/interrupt.h>
> #include <linux/smp.h>
> +#include <linux/kprobes.h>
>
> #include <asm/processor.h>
> #include <asm/system.h>
> @@ -73,7 +74,9 @@ static void (*vendor_thermal_interrupt)(
> fastcall void smp_thermal_interrupt(struct pt_regs *regs)
> {
> irq_enter();
> + push_iaddr((void*)regs->eip);
> vendor_thermal_interrupt(regs);
> + pop_iaddr();
> irq_exit();
> }
>
> diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/irq.c linux-2.6.12-djprobe.3/arch/i386/kernel/irq.c
> --- linux-2.6.12-djprobe.2/arch/i386/kernel/irq.c 2005-06-18 04:48:29.000000000 +0900
> +++ linux-2.6.12-djprobe.3/arch/i386/kernel/irq.c 2005-08-02 20:30:10.000000000 +0900
> @@ -15,6 +15,7 @@
> #include <linux/seq_file.h>
> #include <linux/interrupt.h>
> #include <linux/kernel_stat.h>
> +#include <linux/kprobes.h>
>
> DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
> EXPORT_PER_CPU_SYMBOL(irq_stat);
> @@ -58,6 +59,7 @@ fastcall unsigned int do_IRQ(struct pt_r
> #endif
>
> irq_enter();
> + push_iaddr((void*)regs->eip);
> #ifdef CONFIG_DEBUG_STACKOVERFLOW
> /* Debugging check for stack overflow: is there less than 1KB free? */
> {
> @@ -104,6 +106,7 @@ fastcall unsigned int do_IRQ(struct pt_r
> #endif
> __do_IRQ(irq, regs);
>
> + pop_iaddr();
> irq_exit();
>
> return 1;
> diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/kprobes.c linux-2.6.12-djprobe.3/arch/i386/kernel/kprobes.c
> --- linux-2.6.12-djprobe.2/arch/i386/kernel/kprobes.c 2005-08-01 22:00:17.000000000 +0900
> +++ linux-2.6.12-djprobe.3/arch/i386/kernel/kprobes.c 2005-08-02 20:30:10.000000000 +0900
> @@ -497,7 +497,7 @@ int djprobe_bypass_handler(struct kprobe
> kprobe_opcode_t *stub = djpi->stub.insn;
> int cpu = smp_processor_id();
>
> - if (!DJPI_CHECKED(djpi)) {
> + if (!DJPI_CHECKED(djpi) && check_safety_djprobe_instance(djpi)) {
> cpu_set(cpu, djpi->checked_cpus); /* check this cpu */
>
> if (DJPI_CHECKED(djpi)) { /* all cpus are checked */
> diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/smp.c linux-2.6.12-djprobe.3/arch/i386/kernel/smp.c
> --- linux-2.6.12-djprobe.2/arch/i386/kernel/smp.c 2005-06-18 04:48:29.000000000 +0900
> +++ linux-2.6.12-djprobe.3/arch/i386/kernel/smp.c 2005-08-02 20:30:10.000000000 +0900
> @@ -19,6 +19,7 @@
> #include <linux/mc146818rtc.h>
> #include <linux/cache.h>
> #include <linux/interrupt.h>
> +#include <linux/kprobes.h>
>
> #include <asm/mtrr.h>
> #include <asm/tlbflush.h>
> @@ -601,7 +602,9 @@ fastcall void smp_call_function_interrup
> * At this point the info structure may be out of scope unless wait==1
> */
> irq_enter();
> + push_iaddr((void*)regs->eip);
> (*func)(info);
> + pop_iaddr();
> irq_exit();
>
> if (wait) {
> diff -Narup linux-2.6.12-djprobe.2/arch/i386/kernel/traps.c linux-2.6.12-djprobe.3/arch/i386/kernel/traps.c
> --- linux-2.6.12-djprobe.2/arch/i386/kernel/traps.c 2005-06-18 04:48:29.000000000 +0900
> +++ linux-2.6.12-djprobe.3/arch/i386/kernel/traps.c 2005-08-02 20:30:10.000000000 +0900
> @@ -616,12 +616,13 @@ static int dummy_nmi_callback(struct pt_
> }
>
> static nmi_callback_t nmi_callback = dummy_nmi_callback;
> -
> +
> fastcall void do_nmi(struct pt_regs * regs, long error_code)
> {
> int cpu;
>
> nmi_enter();
> + push_iaddr((void *)regs->eip);
>
> cpu = smp_processor_id();
> ++nmi_count(cpu);
> @@ -629,6 +630,7 @@ fastcall void do_nmi(struct pt_regs * re
> if (!nmi_callback(regs, cpu))
> default_do_nmi(regs);
>
> + pop_iaddr();
> nmi_exit();
> }
>
> diff -Narup linux-2.6.12-djprobe.2/include/linux/kprobes.h linux-2.6.12-djprobe.3/include/linux/kprobes.h
> --- linux-2.6.12-djprobe.2/include/linux/kprobes.h 2005-08-02 20:29:44.000000000 +0900
> +++ linux-2.6.12-djprobe.3/include/linux/kprobes.h 2005-08-02 20:30:10.000000000 +0900
> @@ -33,6 +33,7 @@
> #include <linux/notifier.h>
> #include <linux/smp.h>
> #include <asm/kprobes.h>
> +#include <linux/irq.h>
>
> struct kprobe;
> struct pt_regs;
> @@ -121,6 +122,24 @@ struct djprobe {
> struct djprobe_instance * inst;
> };
>
> +#ifdef CONFIG_DJPROBE
> +# define ADDR_STACK_SIZE NR_IRQS
> +struct address_stack {
> + int sp;
> + void * addrs[ADDR_STACK_SIZE];
> +};
> +DECLARE_PER_CPU(struct address_stack, iadr_stack);
> +/* push/pop interrupted address (should be called from non-preempt area)*/
> +# define push_iaddr(addr) { int sp=__get_cpu_var(iadr_stack).sp++; \
> + if (sp < ADDR_STACK_SIZE && sp >= 0) \
> + __get_cpu_var(iadr_stack).addrs[sp]=(addr);}
> +# define pop_iaddr() { int sp=__get_cpu_var(iadr_stack).sp--; \
> + if (sp <= 0) __get_cpu_var(iadr_stack).sp=0;}
> +#else /*!DJPROBE*/
> +# define push_iaddr(addr) do { } while (0)
> +# define pop_iaddr() do { } while (0)
> +#endif
> +
> #ifdef CONFIG_KPROBES
> /* Locks kprobe: irq must be disabled */
> void lock_kprobes(void);
> @@ -155,6 +174,7 @@ extern int arch_prepare_djprobe_instance
> extern int djprobe_bypass_handler(struct kprobe * kp, struct pt_regs * regs);
> extern void arch_uninstall_djprobe_instance(struct djprobe_instance *djpi);
> extern void schdule_release_djprobe_instance(void);
> +extern int check_safety_djprobe_instance(struct djprobe_instance *djpi);
>
> int register_djprobe(struct djprobe *p);
> void unregister_djprobe(struct djprobe *p);
> diff -Narup linux-2.6.12-djprobe.2/kernel/kprobes.c linux-2.6.12-djprobe.3/kernel/kprobes.c
> --- linux-2.6.12-djprobe.2/kernel/kprobes.c 2005-08-01 22:00:17.000000000 +0900
> +++ linux-2.6.12-djprobe.3/kernel/kprobes.c 2005-08-02 20:30:10.000000000 +0900
> @@ -265,10 +265,32 @@ void unregister_jprobe(struct jprobe *jp
> * This list is operated on registering and unregistering djprobe.
> * Thus, It is not required processing speed. I decided using a 'list'.
> */
> +DEFINE_PER_CPU(struct address_stack, iadr_stack);
> static DEFINE_SPINLOCK(djprobe_lock);
> static LIST_HEAD(djprobe_list);
> static int nr_instances = 0;
>
> +/* safety check routine */
> +int check_safety_djprobe_instance(struct djprobe_instance *djpi)
> +{
> + int sp;
> + preempt_disable();
> + sp = __get_cpu_var(iadr_stack).sp;
> + if (sp > ADDR_STACK_SIZE ){
> + return 0; /*overflowed -- we can't check it*/
> + }
> +
> + for (; sp > 0; sp--) {
> + void * addr = __get_cpu_var(iadr_stack).addrs[sp];
> + if((void*)djpi->kp.addr < addr &&
> + (void*)djpi->kp.addr + djpi->stub.size > addr)
> + return 0; /*we find insertion address
> + in interrupted addresses*/
> + }
> + preempt_enable();
> + return 1;
> +}
> +
> static void work_free_djprobe_instances(void *data)
> {
> struct list_head *pos;
OpenPGP public key: http://krystal.dyndns.org:8080/key/compudj.gpg
Key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68