[Fault-injection-developer] kprobes patch against 2.5.47
Status: Alpha
Brought to you by:
rustyl
From: Zhuang, L. <lou...@in...> - 2002-11-15 09:58:22
|
Hi Folks, I've ported current kprobes into 2.5.47. The effort is a all-in-one patch including kprobes, debug register, kwatch, user space. - Louis diff -Nur -X /root/dontdiff 47/arch/i386/Kconfig 47-kp/arch/i386/Kconfig --- 47/arch/i386/Kconfig Mon Nov 11 11:28:05 2002 +++ 47-kp/arch/i386/Kconfig Fri Nov 15 17:22:47 2002 @@ -1551,6 +1551,23 @@ Say Y here if you are developing drivers or trying to debug and identify kernel problems. +config KPROBES + bool "Kprobes" + depends on DEBUG_KERNEL + help + Kprobes allows you to trap at almost any kernel address, using + register_kprobe(), and providing a callback function. This is useful + for kernel debugging, non-intrusive instrumentation and testing. If + in doubt, say "N". + +config DEBUGREG + bool "Global Debug Registers" + depend on DEBUG_KERNEL + +config KWATCH + bool "Kwatch points" + depend on DEBUGREG + config DEBUG_STACKOVERFLOW bool "Check for stack overflows" depends on DEBUG_KERNEL diff -Nur -X /root/dontdiff 47/arch/i386/kernel/Makefile 47-kp/arch/i386/kernel/Makefile --- 47/arch/i386/kernel/Makefile Mon Nov 11 11:28:05 2002 +++ 47-kp/arch/i386/kernel/Makefile Fri Nov 15 17:20:45 2002 @@ -4,7 +4,7 @@ EXTRA_TARGETS := head.o init_task.o -export-objs := mca.o i386_ksyms.o time.o +export-objs := mca.o i386_ksyms.o time.o debugreg.o kwatch.o obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o vm86.o \ ptrace.o i8259.o ioport.o ldt.o setup.o time.o sys_i386.o \ @@ -28,6 +28,9 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_EDD) += edd.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_DEBUGREG) += debugreg.o +obj-$(CONFIG_KWATCH) += kwatch.o EXTRA_AFLAGS := -traditional diff -Nur -X /root/dontdiff 47/arch/i386/kernel/debugreg.c 47-kp/arch/i386/kernel/debugreg.c --- 47/arch/i386/kernel/debugreg.c Thu Jan 1 08:00:00 1970 +++ 47-kp/arch/i386/kernel/debugreg.c Fri Nov 15 17:16:10 2002 @@ -0,0 +1,178 @@ +/* + * This provides a debug register allocation mechanism, to be + * used by all debuggers, which need debug registers. + * + * Author: vam...@in... + * bh...@in... + */ +#include <linux/kernel.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/system.h> +#include <asm/debugreg.h> + +struct debugreg dr_list[DR_MAX]; +unsigned long dr7_global_mask = 0; +static spinlock_t dr_lock = SPIN_LOCK_UNLOCKED; + +static inline void set_dr7_global_mask(int regnum) +{ + switch (regnum) { + case 0: dr7_global_mask |= DR7_DR0_BITS; break; + case 1: dr7_global_mask |= DR7_DR1_BITS; break; + case 2: dr7_global_mask |= DR7_DR2_BITS; break; + case 3: dr7_global_mask |= DR7_DR3_BITS; break; + } + return; +} + +static inline void clear_dr7_global_mask(int regnum) +{ + switch (regnum) { + case 0: dr7_global_mask &= ~DR7_DR0_BITS; break; + case 1: dr7_global_mask &= ~DR7_DR1_BITS; break; + case 2: dr7_global_mask &= ~DR7_DR2_BITS; break; + case 3: dr7_global_mask &= ~DR7_DR3_BITS; break; + } + return; +} + +static int get_dr(int regnum, int flag) +{ + if ((flag == DR_ALLOC_GLOBAL) && (dr_list[regnum].flag == DR_UNUSED)) { + dr_list[regnum].flag = DR_GLOBAL; + set_dr7_global_mask(regnum); + return regnum; + } + else if ((dr_list[regnum].flag == DR_UNUSED) || (dr_list[regnum].flag == DR_LOCAL)) { + dr_list[regnum].use_count++; + dr_list[regnum].flag = DR_LOCAL; + return regnum; + } + return -1; +} + +static int get_any_dr(int flag) +{ + int i; + if (flag == DR_ALLOC_LOCAL) { + for (i = 0; i < DR_MAX; i++) { + if (dr_list[i].flag == DR_LOCAL) { + dr_list[i].use_count++; + return i; + } else if (dr_list[i].flag == DR_UNUSED) { + dr_list[i].flag = DR_LOCAL; + dr_list[i].use_count = 1; + return i; + } + } + } else { + for (i = DR_MAX-1; i >= 0; i--) { + if (dr_list[i].flag == DR_UNUSED) { + dr_list[i].flag = DR_GLOBAL; + set_dr7_global_mask(i); + return i; + } + } + } + return -1; +} + +static inline void dr_free_local(int regnum) +{ + if (! (--dr_list[regnum].use_count)) + dr_list[regnum].flag = DR_UNUSED; + return; +} + +static inline void dr_free_global(int regnum) +{ + dr_list[regnum].flag = DR_UNUSED; + dr_list[regnum].use_count = 0; + clear_dr7_global_mask(regnum); + return; +} + +int dr_alloc(int regnum, int flag) +{ + int ret; + + spin_lock(&dr_lock); + if (regnum == DR_ANY) + ret = get_any_dr(flag); + else if (regnum >= DR_MAX) + ret = -1; + else + ret = get_dr(regnum, flag); + spin_unlock(&dr_lock); + return ret; +} + +int dr_free(int regnum) +{ + spin_lock(&dr_lock); + if (regnum >= DR_MAX || dr_list[regnum].flag == DR_UNUSED) { + spin_unlock(&dr_lock); + return -1; + } + if (dr_list[regnum].flag == DR_LOCAL) + dr_free_local(regnum); + else + dr_free_global(regnum); + spin_unlock(&dr_lock); + return 0; +} + +void dr_inc_use_count(unsigned long mask) +{ + int i; + + spin_lock(&dr_lock); + for (i =0; i < DR_MAX; i++) { + if (DR_IS_LOCAL(mask, i)) + dr_list[i].use_count++; + } + spin_unlock(&dr_lock); +} + +void dr_dec_use_count(unsigned long mask) +{ + int i; + + spin_lock(&dr_lock); + for (i =0; i < DR_MAX; i++) { + if (DR_IS_LOCAL(mask, i)) + dr_free_local(i); + } + spin_unlock(&dr_lock); +} + +/* + * This routine decides if the ptrace request is for enabling or disabling + * a debug reg, and accordingly calls dr_alloc() or dr_free(). + * + * gdb uses ptrace to write to debug registers. It assumes that writing to + * debug register always succeds and it doesn't check the return value of + * ptrace. Now with this new global debug register allocation/freeing, + * ptrace request for a local debug register can fail, if the required debug + * register is already globally allocated. Since gdb fails to notice this + * failure, it sometimes tries to free a debug register, which is not + * allocated for it. + */ +int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7) +{ + int i, dr_shift = 1UL; + for (i = 0; i < DR_MAX; i++, dr_shift <<= 2) { + if ((old_dr7 ^ new_dr7) & dr_shift) { + if (new_dr7 & dr_shift) + dr_alloc(i, DR_ALLOC_LOCAL); + else + dr_free(i); + return 0; + } + } + return -1; +} + +EXPORT_SYMBOL(dr_alloc); +EXPORT_SYMBOL(dr_free); diff -Nur -X /root/dontdiff 47/arch/i386/kernel/entry.S 47-kp/arch/i386/kernel/entry.S --- 47/arch/i386/kernel/entry.S Mon Nov 11 11:28:06 2002 +++ 47-kp/arch/i386/kernel/entry.S Thu Nov 14 15:22:14 2002 @@ -404,9 +404,16 @@ jmp ret_from_exception ENTRY(debug) + pushl $-1 # mark this as an int + SAVE_ALL + movl %esp,%edx pushl $0 - pushl $do_debug - jmp error_code + pushl %edx + call do_debug + addl $8,%esp + testl %eax,%eax + jnz restore_all + jmp ret_from_exception ENTRY(nmi) pushl %eax @@ -419,9 +426,16 @@ RESTORE_ALL ENTRY(int3) + pushl $-1 # mark this as an int + SAVE_ALL + movl %esp,%edx pushl $0 - pushl $do_int3 - jmp error_code + pushl %edx + call do_int3 + addl $8,%esp + testl %eax,%eax + jnz restore_all + jmp ret_from_exception ENTRY(overflow) pushl $0 diff -Nur -X /root/dontdiff 47/arch/i386/kernel/kprobes.c 47-kp/arch/i386/kernel/kprobes.c --- 47/arch/i386/kernel/kprobes.c Thu Jan 1 08:00:00 1970 +++ 47-kp/arch/i386/kernel/kprobes.c Fri Nov 15 17:21:23 2002 @@ -0,0 +1,170 @@ +/* + * Support for kernel probes. + * (C) 2002 Vamsi Krishna S <vam...@in...>. + */ + +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/spinlock.h> +#include <linux/preempt.h> + +/* kprobe_status settings */ +#define KPROBE_HIT_ACTIVE 0x00000001 +#define KPROBE_HIT_SS 0x00000002 + +static struct kprobe *current_kprobe; +static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; + +/* + * returns non-zero if opcode modifies the interrupt flag. + */ +static inline int is_IF_modifier(u8 opcode) +{ + switch(opcode) { + case 0xfa: /* cli */ + case 0xfb: /* sti */ + case 0xcf: /* iret/iretd */ + case 0x9d: /* popf/popfd */ + return 1; + } + return 0; +} + +static inline void disarm_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + if (!p->at.inode) { + /* remember the address to be used when rearming the probe. */ + p->at.addr = (void *)(regs->eip - 1); + } + *p->at.addr = p->opcode; + regs->eip = (unsigned long)p->at.addr; +} + +/* + * Interrupts are disabled on entry as trap3 is an interrupt gate and they + * remain disabled thorough out this function. + */ +int kprobe_handler(struct pt_regs *regs) +{ + struct kprobe *p; + int ret = 0; + u8 *addr = (u8 *)(regs->eip-1); + + /* We're in an interrupt, but this is clear and BUG()-safe. */ + preempt_disable(); + + /* Check we're not actually recursing */ + if (kprobe_running()) { + /* We *are* holding lock here, so this is safe. + Disarm the probe we just hit, and ignore it. */ + p = get_kprobe(addr); + if (p) { + disarm_kprobe(p, regs); + ret = 1; + } + /* If it's not ours, can't be delete race, (we hold lock). */ + goto no_kprobe; + } + + lock_kprobes(); + p = get_kprobe(addr); + if (!p) { + unlock_kprobes(); + /* Unregistered (on another cpu) after this hit? Ignore */ + if (*addr != BREAKPOINT_INSTRUCTION) + ret = 1; + /* Not one of ours: let kernel handle it */ + goto no_kprobe; + } + + kprobe_status = KPROBE_HIT_ACTIVE; + current_kprobe = p; + kprobe_saved_eflags = kprobe_old_eflags + = (regs->eflags & (TF_MASK|IF_MASK)); + if (is_IF_modifier(p->opcode)) + kprobe_saved_eflags &= ~IF_MASK; + + p->pre_handler(p, regs); + + regs->eflags |= TF_MASK; + regs->eflags &= ~IF_MASK; + + /* We hold lock, now we remove breakpoint and single step. */ + disarm_kprobe(p, regs); + kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static void rearm_kprobe(struct kprobe *p, struct pt_regs *regs) +{ + regs->eflags &= ~TF_MASK; + *p->at.addr = BREAKPOINT_INSTRUCTION; +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate and they + * remain disabled thorough out this function. And we hold kprobe lock. + */ +int post_kprobe_handler(struct pt_regs *regs) +{ + if (!kprobe_running()) + return 0; + + if (current_kprobe->post_handler) + current_kprobe->post_handler(current_kprobe, regs, 0); + + /* + * We singlestepped with interrupts disabled. So, the result on + * the stack would be incorrect for "pushfl" instruction. + * Note that regs->esp is actually the top of the stack when the + * trap occurs in kernel space. + */ + if (current_kprobe->opcode == 0x9c) { /* pushfl */ + unsigned long *tos; + + if (regs->eip > PAGE_OFFSET) + tos = ®s->esp; + else + tos = (unsigned long *)regs->esp; + *tos &= ~(TF_MASK | IF_MASK); + *tos |= kprobe_old_eflags; + } + + rearm_kprobe(current_kprobe, regs); + regs->eflags |= kprobe_saved_eflags; + + unlock_kprobes(); + preempt_enable_no_resched(); + + /* + * if somebody else is singlestepping across a probe point, eflags + * will have TF set, in which case, continue the remaining processing + * of do_debug, as if this is not a probe hit. + */ + if (regs->eflags & TF_MASK) + return 0; + + return 1; +} + +/* Interrupts disabled, kprobe_lock held. */ +int kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + if (current_kprobe->fault_handler + && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + return 1; + + if (kprobe_status & KPROBE_HIT_SS) { + rearm_kprobe(current_kprobe, regs); + regs->eflags |= kprobe_old_eflags; + + unlock_kprobes(); + preempt_enable_no_resched(); + } + return 0; +} diff -Nur -X /root/dontdiff 47/arch/i386/kernel/kwatch.c 47-kp/arch/i386/kernel/kwatch.c --- 47/arch/i386/kernel/kwatch.c Thu Jan 1 08:00:00 1970 +++ 47-kp/arch/i386/kernel/kwatch.c Fri Nov 15 17:20:45 2002 @@ -0,0 +1,135 @@ +/* + * Support for kernel watchpoints. + * (C) 2002 Vamsi Krishna S <vam...@in...>. + */ +#include <linux/config.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> +#include <linux/spinlock.h> +#include <linux/module.h> +#include <asm/kwatch.h> +#include <asm/debugreg.h> +#include <asm/bitops.h> + +static struct kwatch kwatch_list[DR_MAX]; +static spinlock_t kwatch_lock = SPIN_LOCK_UNLOCKED; +static unsigned long kwatch_in_progress; /* currently being handled */ + +struct dr_info { + int debugreg; + unsigned long addr; + int type; +}; + +static inline void write_smp_dr(void *info) +{ + struct dr_info *dr = (struct dr_info *)info; + + if (cpu_has_de && dr->type == DR_TYPE_IO) + set_in_cr4(X86_CR4_DE); + write_dr(dr->debugreg, dr->addr); +} + +/* Update the debug register on all CPUs */ +static void sync_dr(int debugreg, unsigned long addr, int type) +{ + struct dr_info dr; + dr.debugreg = debugreg; + dr.addr = addr; + dr.type = type; + smp_call_function(write_smp_dr, &dr, 0, 0); +} + +/* + * Interrupts are disabled on entry as trap1 is an interrupt gate and they + * remain disabled thorough out this function. + */ +int kwatch_handler(unsigned long condition, struct pt_regs *regs) +{ + int debugreg = dr_trap(condition); + unsigned long addr = dr_trap_addr(condition); + int retval = 0; + + if (!(condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3))) { + return 0; + } + + /* We're in an interrupt, but this is clear and BUG()-safe. */ + preempt_disable(); + + /* If we are recursing, we already hold the lock. */ + if (kwatch_in_progress) { + goto recursed; + } + set_bit(debugreg, &kwatch_in_progress); + + spin_lock(&kwatch_lock); + if (kwatch_list[debugreg].addr != addr) + goto out; + + if (kwatch_list[debugreg].handler) { + kwatch_list[debugreg].handler(&kwatch_list[debugreg], regs); + } + + if (kwatch_list[debugreg].type == DR_TYPE_EXECUTE) + regs->eflags |= RF_MASK; +out: + clear_bit(debugreg, &kwatch_in_progress); + spin_unlock(&kwatch_lock); + preempt_enable_no_resched(); + return retval; + +recursed: + if (kwatch_list[debugreg].type == DR_TYPE_EXECUTE) + regs->eflags |= RF_MASK; + preempt_enable_no_resched(); + return 1; +} + +int register_kwatch(unsigned long addr, u8 length, u8 type, + kwatch_handler_t handler) +{ + int debugreg; + unsigned long dr7, flags; + + debugreg = dr_alloc(DR_ANY, DR_ALLOC_GLOBAL); + if (debugreg < 0) { + return -1; + } + + spin_lock_irqsave(&kwatch_lock, flags); + kwatch_list[debugreg].addr = addr; + kwatch_list[debugreg].length = length; + kwatch_list[debugreg].type = type; + kwatch_list[debugreg].handler = handler; + spin_unlock_irqrestore(&kwatch_lock, flags); + + write_dr(debugreg, (unsigned long)addr); + sync_dr(debugreg, (unsigned long)addr, type); + if (cpu_has_de && type == DR_TYPE_IO) + set_in_cr4(X86_CR4_DE); + + dr7 = read_dr(7); + SET_DR7(dr7, debugreg, type, length); + write_dr(7, dr7); + sync_dr(7, dr7, 0); + return debugreg; +} + +void unregister_kwatch(int debugreg) +{ + unsigned long flags; + unsigned long dr7 = read_dr(7); + + RESET_DR7(dr7, debugreg); + write_dr(7, dr7); + sync_dr(7, dr7, 0); + dr_free(debugreg); + + spin_lock_irqsave(&kwatch_lock, flags); + kwatch_list[debugreg].addr = 0; + kwatch_list[debugreg].handler = NULL; + spin_unlock_irqrestore(&kwatch_lock, flags); +} +EXPORT_SYMBOL_GPL(register_kwatch); +EXPORT_SYMBOL_GPL(unregister_kwatch); diff -Nur -X /root/dontdiff 47/arch/i386/kernel/process.c 47-kp/arch/i386/kernel/process.c --- 47/arch/i386/kernel/process.c Mon Nov 11 11:28:01 2002 +++ 47-kp/arch/i386/kernel/process.c Fri Nov 15 17:16:10 2002 @@ -47,6 +47,7 @@ #ifdef CONFIG_MATH_EMULATION #include <asm/math_emu.h> #endif +#include <asm/debugreg.h> #include <linux/irq.h> #include <linux/err.h> @@ -241,12 +242,16 @@ kfree(tsk->thread.ts_io_bitmap); tsk->thread.ts_io_bitmap = NULL; } + if (tsk->thread.debugreg[7]) + dr_dec_use_count(tsk->thread.debugreg[7]); } void flush_thread(void) { struct task_struct *tsk = current; + if (tsk->thread.debugreg[7]) + dr_dec_use_count(tsk->thread.debugreg[7]); memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* @@ -330,6 +335,9 @@ desc->a = LDT_entry_a(&info); desc->b = LDT_entry_b(&info); } + + if (current->thread.debugreg[7]) + dr_inc_use_count(current->thread.debugreg[7]); return 0; } @@ -464,6 +472,24 @@ /* * Now maybe reload the debug registers */ +#ifdef CONFIG_DEBUGREG +{ + /* + * Don't reload global debug registers. Don't touch the global debug + * register settings in dr7. + */ + unsigned long next_dr7 = next->debugreg[7]; + if (unlikely(next_dr7)) { + if (DR7_L0(next_dr7)) loaddebug(next, 0); + if (DR7_L1(next_dr7)) loaddebug(next, 1); + if (DR7_L2(next_dr7)) loaddebug(next, 2); + if (DR7_L3(next_dr7)) loaddebug(next, 3); + /* no 4 and 5 */ + loaddebug(next, 6); + load_process_dr7(next_dr7); + } +} +#else if (unlikely(next->debugreg[7])) { loaddebug(next, 0); loaddebug(next, 1); @@ -473,7 +499,7 @@ loaddebug(next, 6); loaddebug(next, 7); } - +#endif if (unlikely(prev->ts_io_bitmap || next->ts_io_bitmap)) { if (next->ts_io_bitmap) { /* diff -Nur -X /root/dontdiff 47/arch/i386/kernel/ptrace.c 47-kp/arch/i386/kernel/ptrace.c --- 47/arch/i386/kernel/ptrace.c Mon Nov 11 11:28:07 2002 +++ 47-kp/arch/i386/kernel/ptrace.c Fri Nov 15 17:16:10 2002 @@ -269,6 +269,11 @@ addr -= (long) &dummy->u_debugreg; addr = addr >> 2; + + if (addr == 7 && (enable_debugreg(child->thread.debugreg[addr], data)) < 0) { + ret = -EBUSY; + break; + } child->thread.debugreg[addr] = data; ret = 0; } diff -Nur -X /root/dontdiff 47/arch/i386/kernel/signal.c 47-kp/arch/i386/kernel/signal.c --- 47/arch/i386/kernel/signal.c Mon Nov 11 11:28:12 2002 +++ 47-kp/arch/i386/kernel/signal.c Fri Nov 15 17:16:10 2002 @@ -22,6 +22,7 @@ #include <asm/ucontext.h> #include <asm/uaccess.h> #include <asm/i387.h> +#include <asm/debugreg.h> #define DEBUG_SIG 0 @@ -574,7 +575,7 @@ * have been cleared if the watchpoint triggered * inside the kernel. */ - __asm__("movl %0,%%db7" : : "r" (current->thread.debugreg[7])); + load_process_dr7(current->thread.debugreg[7]); /* Whee! Actually deliver the signal. */ handle_signal(signr, &info, oldset, regs); diff -Nur -X /root/dontdiff 47/arch/i386/kernel/traps.c 47-kp/arch/i386/kernel/traps.c --- 47/arch/i386/kernel/traps.c Mon Nov 11 11:28:05 2002 +++ 47-kp/arch/i386/kernel/traps.c Fri Nov 15 17:20:45 2002 @@ -23,6 +23,7 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/highmem.h> +#include <linux/kprobes.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -38,6 +39,7 @@ #include <asm/io.h> #include <asm/atomic.h> #include <asm/debugreg.h> +#include <asm/kwatch.h> #include <asm/desc.h> #include <asm/i387.h> #include <asm/nmi.h> @@ -402,7 +404,6 @@ } DO_VM86_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->eip) -DO_VM86_ERROR( 3, SIGTRAP, "int3", int3) DO_VM86_ERROR( 4, SIGSEGV, "overflow", overflow) DO_VM86_ERROR( 5, SIGSEGV, "bounds", bounds) DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->eip) @@ -418,6 +419,9 @@ { if (regs->eflags & VM_MASK) goto gp_in_vm86; + + if (kprobe_running() && kprobe_fault_handler(regs, 13)) + return; if (!(regs->xcs & 3)) goto gp_in_kernel; @@ -549,6 +553,17 @@ nmi_callback = dummy_nmi_callback; } +asmlinkage int do_int3(struct pt_regs *regs, long error_code) +{ + if (kprobe_handler(regs)) + return 1; + /* This is an interrupt gate, because kprobes wants interrupts + disabled. Normal trap handlers don't. */ + restore_interrupts(regs); + do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL); + return 0; +} + /* * Our handling of the processor debug registers is non-trivial. * We do not clear them on entry and exit from the kernel. Therefore @@ -571,7 +586,7 @@ * find every occurrence of the TF bit that could be saved away even * by user code) */ -asmlinkage void do_debug(struct pt_regs * regs, long error_code) +asmlinkage int do_debug(struct pt_regs * regs, long error_code) { unsigned int condition; struct task_struct *tsk = current; @@ -579,6 +594,15 @@ __asm__ __volatile__("movl %%db6,%0" : "=r" (condition)); + if (post_kprobe_handler(regs)) + return 1; + + if (kwatch_handler(condition, regs)) + return 1; + + /* Interrupts not disabled for normal trap handling. */ + restore_interrupts(regs); + /* Mask out spurious debug traps due to lazy DR7 setting */ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { if (!tsk->thread.debugreg[7]) @@ -626,18 +650,16 @@ * the signal is delivered. */ clear_dr7: - __asm__("movl %0,%%db7" - : /* no output */ - : "r" (0)); - return; + load_process_dr7(0); + return 0; debug_vm86: handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - return; + return 0; clear_TF: regs->eflags &= ~TF_MASK; - return; + return 0; } /* @@ -801,6 +823,8 @@ struct task_struct *tsk = current; clts(); /* Allow maths ops (or we recurse) */ + if (kprobe_running() && kprobe_fault_handler(®s, 7)) + return; if (!tsk->used_math) init_fpu(tsk); restore_fpu(tsk); @@ -894,9 +918,9 @@ #endif set_trap_gate(0,÷_error); - set_trap_gate(1,&debug); + _set_gate(idt_table+1,14,3,&debug); /* debug trap for kprobes */ set_intr_gate(2,&nmi); - set_system_gate(3,&int3); /* int3-5 can be called from all */ + _set_gate(idt_table+3,14,3,&int3); /* int3-5 can be called from all */ set_system_gate(4,&overflow); set_system_gate(5,&bounds); set_trap_gate(6,&invalid_op); diff -Nur -X /root/dontdiff 47/arch/i386/mm/fault.c 47-kp/arch/i386/mm/fault.c --- 47/arch/i386/mm/fault.c Mon Nov 11 11:28:01 2002 +++ 47-kp/arch/i386/mm/fault.c Thu Nov 14 15:22:14 2002 @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/tty.h> #include <linux/vt_kern.h> /* For unblank_screen() */ +#include <linux/kprobes.h> #include <asm/system.h> #include <asm/uaccess.h> @@ -163,6 +164,9 @@ /* get the address */ __asm__("movl %%cr2,%0":"=r" (address)); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + return; + /* It's safe to allow irq's after cr2 has been saved */ if (regs->eflags & X86_EFLAGS_IF) local_irq_enable(); diff -Nur -X /root/dontdiff 47/include/asm-i386/debugreg.h 47-kp/include/asm-i386/debugreg.h --- 47/include/asm-i386/debugreg.h Mon Nov 11 11:28:28 2002 +++ 47-kp/include/asm-i386/debugreg.h Fri Nov 15 17:16:10 2002 @@ -61,4 +61,166 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +struct debugreg { + unsigned long flag; + unsigned long use_count; +}; + +/* debugreg flags */ +#define DR_UNUSED 0 +#define DR_LOCAL 1 +#define DR_GLOBAL 2 + +#define DR_MAX 4 +#define DR_ANY DR_MAX + 1 + +/* global or local allocation requests */ +#define DR_ALLOC_GLOBAL 0 +#define DR_ALLOC_LOCAL 1 + +#define DR7_RW_SET(dr, regnum, rw) do { \ + (dr) &= ~(0x3 << (16 + (4 * (regnum)))); \ + (dr) |= (((rw) & 0x3) << (16 + (4 * (regnum)))); \ + } while (0) + +#define DR7_RW_VAL(dr, regnum) \ + (((dr) >> (16 + (4 * (regnum)))) & 0x3) + +#define DR7_LEN_SET(dr, regnum, len) do { \ + (dr) &= ~(0x3 << (18 + (4 * (regnum)))); \ + (dr) |= (((len-1) & 0x3) << (18 + (4 * (regnum)))); \ + } while (0) + +#define DR7_LEN_VAL(dr, regnum) \ + (((dr) >> (18 + (4 * (regnum)))) & 0x3) + +#define DR7_L0(dr) (((dr))&0x1) +#define DR7_L1(dr) (((dr)>>2)&0x1) +#define DR7_L2(dr) (((dr)>>4)&0x1) +#define DR7_L3(dr) (((dr)>>6)&0x1) + +#define DR_IS_LOCAL(dr, num) ((dr) & (1UL << (num <<1))) + +/* Set the rw, len and global flag in dr7 for a debug register */ +#define SET_DR7(dr, regnum, access, len) do { \ + DR7_RW_SET(dr, regnum, access); \ + DR7_LEN_SET(dr, regnum, len); \ + dr |= (2UL << regnum*2); \ + } while (0) + +/* Disable a debug register by clearing the global/local flag in dr7 */ +#define RESET_DR7(dr, regnum) dr &= ~(3UL << regnum*2) + +#define DR7_DR0_BITS 0x000F0003 +#define DR7_DR1_BITS 0x00F0000C +#define DR7_DR2_BITS 0x0F000030 +#define DR7_DR3_BITS 0xF00000C0 + +#define DR_TRAP_MASK 0xF + +#define DR_TYPE_EXECUTE 0x0 +#define DR_TYPE_WRITE 0x1 +#define DR_TYPE_IO 0x2 +#define DR_TYPE_RW 0x3 + +#define get_dr(regnum, val) \ + __asm__("movl %%db" #regnum ", %0" \ + :"=r" (val)) +static inline unsigned long read_dr(int regnum) +{ + unsigned long val = 0; + switch (regnum) { + case 0: get_dr(0, val); break; + case 1: get_dr(1, val); break; + case 2: get_dr(2, val); break; + case 3: get_dr(3, val); break; + case 6: get_dr(6, val); break; + case 7: get_dr(7, val); break; + } + return val; +} +#undef get_dr + +#define set_dr(regnum, val) \ + __asm__("movl %0,%%db" #regnum \ + : /* no output */ \ + :"r" (val)) +static inline void write_dr(int regnum, unsigned long val) +{ + switch (regnum) { + case 0: set_dr(0, val); break; + case 1: set_dr(1, val); break; + case 2: set_dr(2, val); break; + case 3: set_dr(3, val); break; + case 7: set_dr(7, val); break; + } + return; +} +#undef set_dr + +/* + * Given the debug status register, returns the debug register number + * which caused the debug trap. + */ +static inline int dr_trap(unsigned int condition) +{ + int i, reg_shift = 1UL; + for (i = 0; i < DR_MAX; i++, reg_shift <<= 1) + if ((condition & reg_shift)) + return i; + return -1; +} + +/* + * Given the debug status register, returns the address due to which + * the debug trap occured. + */ +static inline unsigned long dr_trap_addr(unsigned int condition) +{ + int regnum = dr_trap(condition); + + if (regnum == -1) + return -1; + return read_dr(regnum); +} + +/* + * Given the debug status register, returns the type of debug trap: + * execute, read/write, write or io. + */ +static inline int dr_trap_type(unsigned int condition) +{ + int regnum = dr_trap(condition); + + if (regnum == -1) + return -1; + return DR7_RW_VAL(read_dr(7), regnum); +} + +/* Function declarations */ + +#ifdef CONFIG_DEBUGREG +extern int dr_alloc(int regnum, int flag); +extern int dr_free(int regnum); +extern void dr_inc_use_count(unsigned long mask); +extern void dr_dec_use_count(unsigned long mask); +extern struct debugreg dr_list[DR_MAX]; +extern unsigned long dr7_global_mask; +extern int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7); + +static inline void load_process_dr7(unsigned long curr_dr7) +{ + write_dr(7, (read_dr(7) & dr7_global_mask) | curr_dr7); +} +#else +static inline int enable_debugreg(unsigned long old_dr7, unsigned long new_dr7) { return 0; } +static inline void void load_process_dr7(unsigned long curr_dr7) +{ + write_dr(7, curr_dr7); +} + +static void dr_inc_use_count(unsigned long mask) { } +static void dr_dec_use_count(unsigned long mask) { } + +#endif /* CONFIG_DEBUGREG */ #endif diff -Nur -X /root/dontdiff 47/include/asm-i386/kprobes.h 47-kp/include/asm-i386/kprobes.h --- 47/include/asm-i386/kprobes.h Thu Jan 1 08:00:00 1970 +++ 47-kp/include/asm-i386/kprobes.h Thu Nov 14 15:22:14 2002 @@ -0,0 +1,34 @@ +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H +/* + * Dynamic Probes (kprobes) support + * Vamsi Krishna S <vam...@in...>, July, 2002 + * Mailing list: dp...@ww... + */ +#include <linux/types.h> +#include <linux/ptrace.h> + +struct pt_regs; + +typedef u8 kprobe_opcode_t; +#define BREAKPOINT_INSTRUCTION 0xcc + +/* trap3/1 are intr gates for kprobes. So, restore the status of IF, + * if necessary, before executing the original int3/1 (trap) handler. + */ +static inline void restore_interrupts(struct pt_regs *regs) +{ + if (regs->eflags & IF_MASK) + __asm__ __volatile__ ("sti"); +} + +#ifdef CONFIG_KPROBES +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int post_kprobe_handler(struct pt_regs *regs); +extern int kprobe_handler(struct pt_regs *regs); +#else /* !CONFIG_KPROBES */ +static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { return 0; } +static inline int post_kprobe_handler(struct pt_regs *regs) { return 0; } +static inline int kprobe_handler(struct pt_regs *regs) { return 0; } +#endif +#endif /* _ASM_KPROBES_H */ diff -Nur -X /root/dontdiff 47/include/asm-i386/kwatch.h 47-kp/include/asm-i386/kwatch.h --- 47/include/asm-i386/kwatch.h Thu Jan 1 08:00:00 1970 +++ 47-kp/include/asm-i386/kwatch.h Fri Nov 15 17:20:45 2002 @@ -0,0 +1,31 @@ +#ifndef _ASM_KWATCH_H +#define _ASM_KWATCH_H +/* + * Dynamic Probes (kwatch points) support + * Vamsi Krishna S <vam...@in...>, Oct, 2002 + */ +#include <linux/types.h> +#include <linux/ptrace.h> + +struct kwatch; +typedef void (*kwatch_handler_t)(struct kwatch *, struct pt_regs *); + +struct kwatch { + unsigned long addr; /* location of watchpoint */ + u8 length; /* range of address */ + u8 type; /* type of watchpoint */ + kwatch_handler_t handler; +}; + +#define RF_MASK 0x00010000 + +#ifdef CONFIG_KWATCH +extern int register_kwatch(unsigned long addr, u8 length, u8 type, kwatch_handler_t handler); +extern void unregister_kwatch(int debugreg); +extern int kwatch_handler(unsigned long condition, struct pt_regs *regs); +#else +static inline int register_kwatch(unsigned long addr, u8 length, u8 type, kwatch_handler_t handler) { return -ENOSYS; } +static inline void unregister_kwatch(int debugreg) { } +static inline int kwatch_handler(unsigned long condition, struct pt_regs *regs) { return 0; } +#endif +#endif /* _ASM_KWATCH_H */ diff -Nur -X /root/dontdiff 47/include/linux/kprobes.h 47-kp/include/linux/kprobes.h --- 47/include/linux/kprobes.h Thu Jan 1 08:00:00 1970 +++ 47-kp/include/linux/kprobes.h Fri Nov 15 17:21:23 2002 @@ -0,0 +1,66 @@ +#ifndef _LINUX_KPROBES_H +#define _LINUX_KPROBES_H +#include <linux/config.h> +#include <linux/list.h> +#include <linux/fs.h> +#include <linux/notifier.h> +#include <linux/smp.h> +#include <asm/kprobes.h> + +struct kprobe; +struct pt_regs; + +typedef void (*kprobe_pre_handler_t)(struct kprobe *, struct pt_regs *); +typedef void (*kprobe_post_handler_t)(struct kprobe *, struct pt_regs *, + unsigned long flags); +typedef int (*kprobe_fault_handler_t)(struct kprobe *, struct pt_regs *, + int trapnr); +struct probe_at { + kprobe_opcode_t *addr; + struct inode *inode; /* for user space probes */ + unsigned long offset; /* for user space probes */ +}; + +struct kprobe { + struct list_head list; + + /* location of the probe point */ + struct probe_at at; + + /* Called before addr is executed. */ + kprobe_pre_handler_t pre_handler; + + /* Called after addr is executed, unless... */ + kprobe_post_handler_t post_handler; + + /* ... called if executing addr causes a fault (eg. page fault). + * Return 1 if it handled fault, otherwise kernel will see it. */ + kprobe_fault_handler_t fault_handler; + + /* Saved opcode (which has been replaced with breakpoint) */ + kprobe_opcode_t opcode; +}; + +#ifdef CONFIG_KPROBES +/* Locks kprobe: irq must be disabled */ +void lock_kprobes(void); +void unlock_kprobes(void); + +/* kprobe running now on this CPU? */ +static inline int kprobe_running(void) +{ + extern unsigned int kprobe_cpu; + return kprobe_cpu == smp_processor_id(); +} + +/* Get the kprobe at this addr (if any). Must have called lock_kprobes */ +struct kprobe *get_kprobe(void *addr); + +int register_kprobe(struct kprobe *p); +void unregister_kprobe(struct kprobe *p); +#else +static inline int kprobe_running(void) { return 0; } +static inline int register_kprobe(struct kprobe *p) { return -ENOSYS; } +static inline void unregister_kprobe(struct kprobe *p) { } +#endif +#endif /* _LINUX_KPROBES_H */ diff -Nur -X /root/dontdiff 47/kernel/Makefile 47-kp/kernel/Makefile --- 47/kernel/Makefile Mon Nov 11 11:28:06 2002 +++ 47-kp/kernel/Makefile Thu Nov 14 15:22:14 2002 @@ -4,7 +4,7 @@ export-objs = signal.o sys.o kmod.o workqueue.o ksyms.o pm.o exec_domain.o \ printk.o platform.o suspend.o dma.o module.o cpufreq.o \ - profile.o rcupdate.o + profile.o rcupdate.o kprobes.o obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ module.o exit.o itimer.o time.o softirq.o resource.o \ @@ -21,6 +21,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq.o obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o +obj-$(CONFIG_KPROBES) += kprobes.o ifneq ($(CONFIG_IA64),y) # According to Alan Modra <al...@li...>, the -fno-omit-frame-pointer is diff -Nur -X /root/dontdiff 47/kernel/kprobes.c 47-kp/kernel/kprobes.c --- 47/kernel/kprobes.c Thu Jan 1 08:00:00 1970 +++ 47-kp/kernel/kprobes.c Fri Nov 15 17:21:23 2002 @@ -0,0 +1,157 @@ +/* Support for kernel probes. + (C) 2002 Vamsi Krishna S <vam...@in...>. +*/ +#include <linux/kprobes.h> +#include <linux/spinlock.h> +#include <linux/hash.h> +#include <linux/init.h> +#include <linux/module.h> +#include <asm/cacheflush.h> +#include <asm/errno.h> + +#define KPROBE_HASH_BITS 6 +#define KPROBE_TABLE_SIZE (1 << KPROBE_HASH_BITS) + +static struct list_head kprobe_table[KPROBE_TABLE_SIZE]; + +unsigned int kprobe_cpu = NR_CPUS; +static spinlock_t kprobe_lock = SPIN_LOCK_UNLOCKED; + +/* Locks kprobe: irqs must be disabled */ +void lock_kprobes(void) +{ + spin_lock(&kprobe_lock); + kprobe_cpu = smp_processor_id(); +} + +void unlock_kprobes(void) +{ + kprobe_cpu = NR_CPUS; + spin_unlock(&kprobe_lock); +} + +/* + * We need to look up the inode and offset from the vma. We can't depend on + * the page->(mapping, index) as that would be incorrect if we ever swap this + * page out (possible for pages which are dirtied by GDB breakpoints etc) + * + * We acquire page_table_lock here to ensure that: + * - current page doesn't go away from under us (kswapd) + * - mm->mmap consistancy (vma are always added under this lock) + * + * We will never deadlock on page_table_lock, we always come here due to a + * probe in user space, no kernel code could have executed to take the + * page_table_lock. + */ +static struct kprobe *get_uprobe_at(struct inode *inode, unsigned long offset) +{ + struct list_head *head; + struct kprobe *p; + + head = &kprobe_table[hash_long((unsigned long)inode*offset, + KPROBE_HASH_BITS)]; + list_for_each_entry(p, head, list) { + if (p->at.inode == inode && p->at.offset == offset) + return p; + } + return NULL; +} + +static struct kprobe *get_uprobe(void *addr) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + struct inode *inode; + unsigned long offset; + + spin_lock(&mm->page_table_lock); + vma = find_vma(mm, (unsigned long)addr); + offset = (unsigned long)addr - vma->vm_start + (vma->vm_pgoff << PAGE_SHIFT); + if (!vma->vm_file) { + spin_unlock(&mm->page_table_lock); + return NULL; + } + inode = vma->vm_file->f_dentry->d_inode; + spin_unlock(&mm->page_table_lock); + + return get_uprobe_at(inode, offset); +} + +/* You have to be holding the kprobe_lock */ +struct kprobe *get_kprobe(void *addr) +{ + struct list_head *head, *tmp; + + if ((unsigned long)addr < PAGE_OFFSET) + return get_uprobe(addr); + + head = &kprobe_table[hash_ptr(addr, KPROBE_HASH_BITS)]; + list_for_each(tmp, head) { + struct kprobe *p = list_entry(tmp, struct kprobe, list); + if (p->at.addr == addr) + return p; + } + return NULL; +} + +/* + * p->at.addr has to be writeable address for uspace probes. + */ +int register_kprobe(struct kprobe *p) +{ + int ret = 0; + kprobe_opcode_t *addr = p->at.addr; + + spin_lock_irq(&kprobe_lock); + if (!p->at.inode) { + if (get_kprobe(addr)) { + ret = -EEXIST; + goto out; + } + list_add(&p->list, &kprobe_table[hash_ptr(addr, + KPROBE_HASH_BITS)]); + } else { + if (get_uprobe_at(p->at.inode, p->at.offset)) { + ret = -EEXIST; + goto out; + } + list_add(&p->list, + &kprobe_table[hash_long( + (unsigned long)p->at.inode * p->at.offset, + KPROBE_HASH_BITS)]); + } + + p->opcode = *addr; + *addr = BREAKPOINT_INSTRUCTION; + flush_icache_range(addr, addr + sizeof(kprobe_opcode_t)); + out: + spin_unlock_irq(&kprobe_lock); + return ret; +} + +void unregister_kprobe(struct kprobe *p) +{ + kprobe_opcode_t *addr = p->at.addr; + + spin_lock_irq(&kprobe_lock); + *addr = p->opcode; + list_del(&p->list); + flush_icache_range(addr, addr + sizeof(kprobe_opcode_t)); + spin_unlock_irq(&kprobe_lock); +} + +static int __init init_kprobes(void) +{ + int i; + + /* FIXME allocate the probe table, currently defined statically */ + /* initialize all list heads */ + for (i = 0; i < KPROBE_TABLE_SIZE; i++) + INIT_LIST_HEAD(&kprobe_table[i]); + + return 0; +} +__initcall(init_kprobes); + +EXPORT_SYMBOL_GPL(register_kprobe); +EXPORT_SYMBOL_GPL(unregister_kprobe); |