From b332828c39326b1dca617f387dd15d12e81cd5f0 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:10 +0530 Subject: hw-breakpoints: prepare the code for Hardware Breakpoint interfaces The generic hardware breakpoint interface provides an abstraction of hardware breakpoints in front of specific arch implementations for both kernel and user side breakpoints. This includes execution breakpoints and read/write breakpoints, also known as "watchpoints". This patch introduces header files containing constants, structure definitions and declaration of functions used by the hardware breakpoint core and x86 specific code. It also introduces an array based storage for the debug-register values in 'struct thread_struct', while modifying all users of debugreg member in the structure. [ Impact: add headers for new hardware breakpoint interface ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/a.out-core.h | 8 +++--- arch/x86/include/asm/debugreg.h | 29 +++++++++++++++++++ arch/x86/include/asm/hw_breakpoint.h | 55 ++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/processor.h | 8 +++--- arch/x86/kernel/process.c | 16 +++++------ arch/x86/kernel/ptrace.c | 16 +++++------ arch/x86/power/cpu_32.c | 8 +++--- arch/x86/power/cpu_64.c | 8 +++--- 8 files changed, 116 insertions(+), 32 deletions(-) create mode 100644 arch/x86/include/asm/hw_breakpoint.h (limited to 'arch') diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index bb70e39..fc4685d 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -32,10 +32,10 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg0; - dump->u_debugreg[1] = current->thread.debugreg1; - dump->u_debugreg[2] = current->thread.debugreg2; - dump->u_debugreg[3] = current->thread.debugreg3; + dump->u_debugreg[0] = current->thread.debugreg[0]; + dump->u_debugreg[1] = current->thread.debugreg[1]; + dump->u_debugreg[2] = current->thread.debugreg[2]; + dump->u_debugreg[3] = current->thread.debugreg[3]; dump->u_debugreg[4] = 0; dump->u_debugreg[5] = 0; dump->u_debugreg[6] = current->thread.debugreg6; diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 3ea6f37..23439fb 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -18,6 +18,7 @@ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ #define DR_TRAP3 (0x8) /* db3 */ +#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3) #define DR_STEP (0x4000) /* single-step */ #define DR_SWITCH (0x8000) /* task switch */ @@ -49,6 +50,8 @@ #define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */ #define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */ +#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */ +#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */ #define DR_ENABLE_SIZE 2 /* 2 enable bits per register */ #define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */ @@ -67,4 +70,30 @@ #define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */ #define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */ +/* + * HW breakpoint additions + */ +#ifdef __KERNEL__ + +/* For process management */ +extern void flush_thread_hw_breakpoint(struct task_struct *tsk); +extern int copy_thread_hw_breakpoint(struct task_struct *tsk, + struct task_struct *child, unsigned long clone_flags); + +/* For CPU management */ +extern void load_debug_registers(void); +static inline void hw_breakpoint_disable(void) +{ + /* Zero the control register for HW Breakpoint */ + set_debugreg(0UL, 7); + + /* Zero-out the individual HW breakpoint address registers */ + set_debugreg(0UL, 0); + set_debugreg(0UL, 1); + set_debugreg(0UL, 2); + set_debugreg(0UL, 3); +} + +#endif /* __KERNEL__ */ + #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h new file mode 100644 index 0000000..1acb4d4 --- /dev/null +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -0,0 +1,55 @@ +#ifndef _I386_HW_BREAKPOINT_H +#define _I386_HW_BREAKPOINT_H + +#ifdef __KERNEL__ +#define __ARCH_HW_BREAKPOINT_H + +struct arch_hw_breakpoint { + char *name; /* Contains name of the symbol to set bkpt */ + unsigned long address; + u8 len; + u8 type; +}; + +#include +#include + +/* Available HW breakpoint length encodings */ +#define HW_BREAKPOINT_LEN_1 0x40 +#define HW_BREAKPOINT_LEN_2 0x44 +#define HW_BREAKPOINT_LEN_4 0x4c +#define HW_BREAKPOINT_LEN_EXECUTE 0x40 + +#ifdef CONFIG_X86_64 +#define HW_BREAKPOINT_LEN_8 0x48 +#endif + +/* Available HW breakpoint type encodings */ + +/* trigger on instruction execute */ +#define HW_BREAKPOINT_EXECUTE 0x80 +/* trigger on memory write */ +#define HW_BREAKPOINT_WRITE 0x81 +/* trigger on memory read or write */ +#define HW_BREAKPOINT_RW 0x83 + +/* Total number of available HW breakpoint registers */ +#define HBP_NUM 4 + +extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; +DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); +extern unsigned int hbp_user_refcount[HBP_NUM]; + +extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); +extern void arch_uninstall_thread_hw_breakpoint(void); +extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); +extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk); +extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); +extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); +extern void arch_update_kernel_hw_breakpoint(void *); +extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, + unsigned long val, void *data); +#endif /* __KERNEL__ */ +#endif /* _I386_HW_BREAKPOINT_H */ + diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 0b2fab0..448b34a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -29,6 +29,7 @@ struct mm_struct; #include #include +#define HBP_NUM 4 /* * Default implementation of macro that returns current * instruction pointer ("program counter"). @@ -431,12 +432,11 @@ struct thread_struct { unsigned long fs; unsigned long gs; /* Hardware debugging registers: */ - unsigned long debugreg0; - unsigned long debugreg1; - unsigned long debugreg2; - unsigned long debugreg3; + unsigned long debugreg[HBP_NUM]; unsigned long debugreg6; unsigned long debugreg7; + /* Hardware breakpoint info */ + struct hw_breakpoint *hbp[HBP_NUM]; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index fb5dfb8..291527c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -106,10 +106,10 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg0 = 0; - tsk->thread.debugreg1 = 0; - tsk->thread.debugreg2 = 0; - tsk->thread.debugreg3 = 0; + tsk->thread.debugreg[0] = 0; + tsk->thread.debugreg[1] = 0; + tsk->thread.debugreg[2] = 0; + tsk->thread.debugreg[3] = 0; tsk->thread.debugreg6 = 0; tsk->thread.debugreg7 = 0; memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); @@ -194,10 +194,10 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, update_debugctlmsr(next->debugctlmsr); if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg0, 0); - set_debugreg(next->debugreg1, 1); - set_debugreg(next->debugreg2, 2); - set_debugreg(next->debugreg3, 3); + set_debugreg(next->debugreg[0], 0); + set_debugreg(next->debugreg[1], 1); + set_debugreg(next->debugreg[2], 2); + set_debugreg(next->debugreg[3], 3); /* no 4 and 5 */ set_debugreg(next->debugreg6, 6); set_debugreg(next->debugreg7, 7); diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde..313be40 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -471,10 +471,10 @@ static int genregs_set(struct task_struct *target, static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) { switch (n) { - case 0: return child->thread.debugreg0; - case 1: return child->thread.debugreg1; - case 2: return child->thread.debugreg2; - case 3: return child->thread.debugreg3; + case 0: return child->thread.debugreg[0]; + case 1: return child->thread.debugreg[1]; + case 2: return child->thread.debugreg[2]; + case 3: return child->thread.debugreg[3]; case 6: return child->thread.debugreg6; case 7: return child->thread.debugreg7; } @@ -493,10 +493,10 @@ static int ptrace_set_debugreg(struct task_struct *child, return -EIO; switch (n) { - case 0: child->thread.debugreg0 = data; break; - case 1: child->thread.debugreg1 = data; break; - case 2: child->thread.debugreg2 = data; break; - case 3: child->thread.debugreg3 = data; break; + case 0: child->thread.debugreg[0] = data; break; + case 1: child->thread.debugreg[1] = data; break; + case 2: child->thread.debugreg[2] = data; break; + case 3: child->thread.debugreg[3] = data; break; case 6: if ((data & ~0xffffffffUL) != 0) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index ce702c5..5199139 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -84,10 +84,10 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg0, 0); - set_debugreg(current->thread.debugreg1, 1); - set_debugreg(current->thread.debugreg2, 2); - set_debugreg(current->thread.debugreg3, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ set_debugreg(current->thread.debugreg6, 6); set_debugreg(current->thread.debugreg7, 7); diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 5343540..1e3bdcc 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -163,10 +163,10 @@ static void fix_processor_context(void) * Now maybe reload the debug registers */ if (current->thread.debugreg7){ - loaddebug(¤t->thread, 0); - loaddebug(¤t->thread, 1); - loaddebug(¤t->thread, 2); - loaddebug(¤t->thread, 3); + set_debugreg(current->thread.debugreg[0], 0); + set_debugreg(current->thread.debugreg[1], 1); + set_debugreg(current->thread.debugreg[2], 2); + set_debugreg(current->thread.debugreg[3], 3); /* no 4 and 5 */ loaddebug(¤t->thread, 6); loaddebug(¤t->thread, 7); -- cgit v1.1 From 62a038d34db26771756cf3689e36de638bedd2c4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:33 +0530 Subject: hw-breakpoints: introducing generic hardware breakpoint handler interfaces This patch introduces the generic Hardware Breakpoint interfaces for both user and kernel space requests. This core Api handles the hardware breakpoints through new helpers. It handles the user-space breakpoints and kernel breakpoints in front of arch implementation. One can choose kernel wide breakpoints using the following helpers and passing them a generic struct hw_breakpoint: - register_kernel_hw_breakpoint() - unregister_kernel_hw_breakpoint() - modify_kernel_hw_breakpoint() On the other side, you can choose per task breakpoints. - register_user_hw_breakpoint() - unregister_user_hw_breakpoint() - modify_user_hw_breakpoint() [ fweisbec@gmail.com: fix conflict against perfcounter ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/Kconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index 78a35e9..1adf2d0 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -112,3 +112,7 @@ config HAVE_DMA_API_DEBUG config HAVE_DEFAULT_NO_SPIN_MUTEXES bool + +config HAVE_HW_BREAKPOINT + bool + -- cgit v1.1 From 0067f1297241ea567f2b22a455519752d70fcca9 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:43:57 +0530 Subject: hw-breakpoints: x86 architecture implementation of Hardware Breakpoint interfaces This patch introduces the arch-specific implementation of the generic hardware breakpoints in kernel/hw_breakpoint.c inside x86 specific directories. It contains functions which help to validate and serve requests using Hardware Breakpoint registers on x86 processors. [ fweisbec@gmail.com: fix conflict against kmemcheck ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/Kconfig | 1 + arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/hw_breakpoint.c | 382 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 384 insertions(+), 1 deletion(-) create mode 100644 arch/x86/kernel/hw_breakpoint.c (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index df9e885..3033375 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,6 +46,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 select HAVE_KERNEL_LZMA + select HAVE_HW_BREAKPOINT config ARCH_DEFCONFIG string diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 77df4d6..cbc7818 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -36,7 +36,7 @@ obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o obj-y += bootflag.o e820.o obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o pci-nommu.o +obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o obj-y += tsc.o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c new file mode 100644 index 0000000..4867c9f --- /dev/null +++ b/arch/x86/kernel/hw_breakpoint.c @@ -0,0 +1,382 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2007 Alan Stern + * Copyright (C) 2009 IBM Corporation + */ + +/* + * HW_breakpoint: a unified kernel/user-space hardware breakpoint facility, + * using the CPU's debug registers. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +/* Unmasked kernel DR7 value */ +static unsigned long kdr7; + +/* + * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. + * Used to clear and verify the status of bits corresponding to DR0 - DR3 + */ +static const unsigned long dr7_masks[HBP_NUM] = { + 0x000f0003, /* LEN0, R/W0, G0, L0 */ + 0x00f0000c, /* LEN1, R/W1, G1, L1 */ + 0x0f000030, /* LEN2, R/W2, G2, L2 */ + 0xf00000c0 /* LEN3, R/W3, G3, L3 */ +}; + + +/* + * Encode the length, type, Exact, and Enable bits for a particular breakpoint + * as stored in debug register 7. + */ +static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +{ + unsigned long bp_info; + + bp_info = (len | type) & 0xf; + bp_info <<= (DR_CONTROL_SHIFT + drnum * DR_CONTROL_SIZE); + bp_info |= (DR_GLOBAL_ENABLE << (drnum * DR_ENABLE_SIZE)) | + DR_GLOBAL_SLOWDOWN; + return bp_info; +} + +void arch_update_kernel_hw_breakpoint(void *unused) +{ + struct hw_breakpoint *bp; + int i, cpu = get_cpu(); + unsigned long temp_kdr7 = 0; + + /* Don't allow debug exceptions while we update the registers */ + set_debugreg(0UL, 7); + + for (i = hbp_kernel_pos; i < HBP_NUM; i++) { + per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; + if (bp) { + temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); + set_debugreg(bp->info.address, i); + } + } + + /* No need to set DR6. Update the debug registers with kernel-space + * breakpoint values from kdr7 and user-space requests from the + * current process + */ + kdr7 = temp_kdr7; + set_debugreg(kdr7 | current->thread.debugreg7, 7); + put_cpu_no_resched(); +} + +/* + * Install the thread breakpoints in their debug registers. + */ +void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + + switch (hbp_kernel_pos) { + case 4: + set_debugreg(thread->debugreg[3], 3); + case 3: + set_debugreg(thread->debugreg[2], 2); + case 2: + set_debugreg(thread->debugreg[1], 1); + case 1: + set_debugreg(thread->debugreg[0], 0); + default: + break; + } + + /* No need to set DR6 */ + set_debugreg((kdr7 | thread->debugreg7), 7); +} + +/* + * Install the debug register values for just the kernel, no thread. + */ +void arch_uninstall_thread_hw_breakpoint() +{ + /* Clear the user-space portion of debugreg7 by setting only kdr7 */ + set_debugreg(kdr7, 7); + +} + +static int get_hbp_len(u8 hbp_len) +{ + unsigned int len_in_bytes = 0; + + switch (hbp_len) { + case HW_BREAKPOINT_LEN_1: + len_in_bytes = 1; + break; + case HW_BREAKPOINT_LEN_2: + len_in_bytes = 2; + break; + case HW_BREAKPOINT_LEN_4: + len_in_bytes = 4; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + len_in_bytes = 8; + break; +#endif + } + return len_in_bytes; +} + +/* + * Check for virtual address in user space. + */ +int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va <= TASK_SIZE - len); +} + +/* + * Check for virtual address in kernel space. + */ +int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +{ + unsigned int len; + + len = get_hbp_len(hbp_len); + + return (va >= TASK_SIZE) && ((va + len - 1) >= TASK_SIZE); +} + +/* + * Store a breakpoint's encoded address, length, and type. + */ +static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +{ + /* + * User-space requests will always have the address field populated + * Symbol names from user-space are rejected + */ + if (tsk && bp->info.name) + return -EINVAL; + /* + * For kernel-addresses, either the address or symbol name can be + * specified. + */ + if (bp->info.name) + bp->info.address = (unsigned long) + kallsyms_lookup_name(bp->info.name); + if (bp->info.address) + return 0; + return -EINVAL; +} + +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, + struct task_struct *tsk) +{ + unsigned int align; + int ret = -EINVAL; + + switch (bp->info.type) { + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + case HW_BREAKPOINT_EXECUTE: + if ((!arch_check_va_in_userspace(bp->info.address, + bp->info.len)) && + bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) + return ret; + break; + case HW_BREAKPOINT_WRITE: + break; + case HW_BREAKPOINT_RW: + break; + default: + return ret; + } + + switch (bp->info.len) { + case HW_BREAKPOINT_LEN_1: + align = 0; + break; + case HW_BREAKPOINT_LEN_2: + align = 1; + break; + case HW_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case HW_BREAKPOINT_LEN_8: + align = 7; + break; +#endif + default: + return ret; + } + + if (bp->triggered) + ret = arch_store_info(bp, tsk); + + if (ret < 0) + return ret; + /* + * Check that the low-order bits of the address are appropriate + * for the alignment implied by len. + */ + if (bp->info.address & align) + return -EINVAL; + + /* Check that the virtual address is in the proper range */ + if (tsk) { + if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + return -EFAULT; + } else { + if (!arch_check_va_in_kernelspace(bp->info.address, + bp->info.len)) + return -EFAULT; + } + return 0; +} + +void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +{ + struct thread_struct *thread = &(tsk->thread); + struct hw_breakpoint *bp = thread->hbp[pos]; + + thread->debugreg7 &= ~dr7_masks[pos]; + if (bp) { + thread->debugreg[pos] = bp->info.address; + thread->debugreg7 |= encode_dr7(pos, bp->info.len, + bp->info.type); + } else + thread->debugreg[pos] = 0; +} + +void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +{ + int i; + struct thread_struct *thread = &(tsk->thread); + + thread->debugreg7 = 0; + for (i = 0; i < HBP_NUM; i++) + thread->debugreg[i] = 0; +} + +/* + * Handle debug exception notifications. + * + * Return value is either NOTIFY_STOP or NOTIFY_DONE as explained below. + * + * NOTIFY_DONE returned if one of the following conditions is true. + * i) When the causative address is from user-space and the exception + * is a valid one, i.e. not triggered as a result of lazy debug register + * switching + * ii) When there are more bits than trap set in DR6 register (such + * as BD, BS or BT) indicating that more than one debug condition is + * met and requires some more action in do_debug(). + * + * NOTIFY_STOP returned for all other cases + * + */ +int __kprobes hw_breakpoint_handler(struct die_args *args) +{ + int i, cpu, rc = NOTIFY_STOP; + struct hw_breakpoint *bp; + /* The DR6 value is stored in args->err */ + unsigned long dr7, dr6 = args->err; + + /* Do an early return if no trap bits are set in DR6 */ + if ((dr6 & DR_TRAP_BITS) == 0) + return NOTIFY_DONE; + + /* Lazy debug register switching */ + if (!test_tsk_thread_flag(current, TIF_DEBUG)) + arch_uninstall_thread_hw_breakpoint(); + + get_debugreg(dr7, 7); + /* Disable breakpoints during exception handling */ + set_debugreg(0UL, 7); + /* + * Assert that local interrupts are disabled + * Reset the DRn bits in the virtualized register value. + * The ptrace trigger routine will add in whatever is needed. + */ + current->thread.debugreg6 &= ~DR_TRAP_BITS; + cpu = get_cpu(); + + /* Handle all the breakpoints that were triggered */ + for (i = 0; i < HBP_NUM; ++i) { + if (likely(!(dr6 & (DR_TRAP0 << i)))) + continue; + /* + * Find the corresponding hw_breakpoint structure and + * invoke its triggered callback. + */ + if (i >= hbp_kernel_pos) + bp = per_cpu(this_hbp_kernel[i], cpu); + else { + bp = current->thread.hbp[i]; + if (bp) + rc = NOTIFY_DONE; + } + /* + * bp can be NULL due to lazy debug register switching + * or due to the delay between updates of hbp_kernel_pos + * and this_hbp_kernel. + */ + if (!bp) + continue; + + (bp->triggered)(bp, args->regs); + } + if (dr6 & (~DR_TRAP_BITS)) + rc = NOTIFY_DONE; + + set_debugreg(dr7, 7); + put_cpu_no_resched(); + return rc; +} + +/* + * Handle debug exception notifications. + */ +int __kprobes hw_breakpoint_exceptions_notify( + struct notifier_block *unused, unsigned long val, void *data) +{ + if (val != DIE_DEBUG) + return NOTIFY_DONE; + + return hw_breakpoint_handler(data); +} -- cgit v1.1 From 08d68323d1f0c34452e614263b212ca556dae47f Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:08 +0530 Subject: hw-breakpoints: modifying generic debug exception to use thread-specific debug registers This patch modifies the breakpoint exception handler code to use the new abstract debug register names. [ fweisbec@gmail.com: fix conflict against kmemcheck ] [ Impact: refactor and cleanup x86 debug exception handler ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/traps.c | 69 +++++++++++++++++-------------------------------- 1 file changed, 24 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d2883..de99132 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -529,73 +529,52 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; - unsigned long condition; + unsigned long dr6; int si_code; - get_debugreg(condition, 6); + get_debugreg(dr6, 6); + /* DR6 may or may not be cleared by the CPU */ + set_debugreg(0, 6); /* * The processor cleared BTF, so don't mark that we need it set. */ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); tsk->thread.debugctlmsr = 0; - if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, + /* Store the virtualized DR6 value */ + tsk->thread.debugreg6 = dr6; + + if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code, SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ preempt_conditional_sti(regs); - /* Mask out spurious debug traps due to lazy DR7 setting */ - if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { - if (!tsk->thread.debugreg7) - goto clear_dr7; + if (regs->flags & X86_VM_MASK) { + handle_vm86_trap((struct kernel_vm86_regs *) regs, + error_code, 1); + return; } -#ifdef CONFIG_X86_32 - if (regs->flags & X86_VM_MASK) - goto debug_vm86; -#endif - - /* Save debug status register where ptrace can see it */ - tsk->thread.debugreg6 = condition; - /* - * Single-stepping through TF: make sure we ignore any events in - * kernel space (but re-enable TF when returning to user mode). + * Single-stepping through system calls: ignore any exceptions in + * kernel space, but re-enable TF when returning to user mode. + * + * We already checked v86 mode above, so we can check for kernel mode + * by just checking the CPL of CS. */ - if (condition & DR_STEP) { - if (!user_mode(regs)) - goto clear_TF_reenable; + if ((dr6 & DR_STEP) && !user_mode(regs)) { + tsk->thread.debugreg6 &= ~DR_STEP; + set_tsk_thread_flag(tsk, TIF_SINGLESTEP); + regs->flags &= ~X86_EFLAGS_TF; } - - si_code = get_si_code(condition); - /* Ok, finally something we can handle */ - send_sigtrap(tsk, regs, error_code, si_code); - - /* - * Disable additional traps. They'll be re-enabled when - * the signal is delivered. - */ -clear_dr7: - set_debugreg(0, 7); + si_code = get_si_code(tsk->thread.debugreg6); + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); - return; -#ifdef CONFIG_X86_32 -debug_vm86: - /* reenable preemption: handle_vm86_trap() might sleep */ - dec_preempt_count(); - handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1); - conditional_cli(regs); - return; -#endif - -clear_TF_reenable: - set_tsk_thread_flag(tsk, TIF_SINGLESTEP); - regs->flags &= ~X86_EFLAGS_TF; - preempt_conditional_cli(regs); return; } -- cgit v1.1 From 1e3500666f7c5daaadadb8431a2927cdbbdb7dd4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:26 +0530 Subject: hw-breakpoints: use wrapper routines around debug registers in processor related functions This patch enables the use of wrapper routines to access the debug/breakpoint registers on cpu management. The hardcoded debug registers save and restore operations for threads breakpoints are replaced by wrappers. And now that we handle the kernel breakpoints too, we also need to handle them on cpu hotplug operations. [ Impact: adapt new hardware breakpoint api to cpu hotplug ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/smpboot.c | 3 +++ arch/x86/power/cpu_32.c | 13 +++---------- arch/x86/power/cpu_64.c | 12 +++--------- 3 files changed, 9 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef..2b2652d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include @@ -326,6 +327,7 @@ notrace static void __cpuinit start_secondary(void *unused) setup_secondary_clock(); wmb(); + load_debug_registers(); cpu_idle(); } @@ -1250,6 +1252,7 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); + hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 5199139..2bc3b01 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c @@ -13,6 +13,7 @@ #include #include #include +#include static struct saved_context saved_context; @@ -48,6 +49,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr2 = read_cr2(); ctxt->cr3 = read_cr3(); ctxt->cr4 = read_cr4_safe(); + hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -83,16 +85,7 @@ static void fix_processor_context(void) /* * Now maybe reload the debug registers */ - if (current->thread.debugreg7) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } - + load_debug_registers(); } static void __restore_processor_state(struct saved_context *ctxt) diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c index 1e3bdcc..46866a1 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu_64.c @@ -16,6 +16,7 @@ #include #include #include +#include static void fix_processor_context(void); @@ -71,6 +72,7 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr3 = read_cr3(); ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); + hw_breakpoint_disable(); } void save_processor_state(void) @@ -162,13 +164,5 @@ static void fix_processor_context(void) /* * Now maybe reload the debug registers */ - if (current->thread.debugreg7){ - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - /* no 4 and 5 */ - loaddebug(¤t->thread, 6); - loaddebug(¤t->thread, 7); - } + load_debug_registers(); } -- cgit v1.1 From 66cb5917295958652ff6ba36d83f98f2379c46b4 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:44:55 +0530 Subject: hw-breakpoints: use the new wrapper routines to access debug registers in process/thread code This patch enables the use of abstract debug registers in process-handling routines, according to the new hardware breakpoint Api. [ Impact: adapt thread breakpoints handling code to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/process.c | 22 ++++++---------------- arch/x86/kernel/process_32.c | 28 ++++++++++++++++++++++++++++ arch/x86/kernel/process_64.c | 31 +++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 291527c..19a686c 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -46,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -106,12 +110,8 @@ void flush_thread(void) clear_tsk_thread_flag(tsk, TIF_DEBUG); - tsk->thread.debugreg[0] = 0; - tsk->thread.debugreg[1] = 0; - tsk->thread.debugreg[2] = 0; - tsk->thread.debugreg[3] = 0; - tsk->thread.debugreg6 = 0; - tsk->thread.debugreg7 = 0; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. @@ -193,16 +193,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else if (next->debugctlmsr != prev->debugctlmsr) update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg6, 6); - set_debugreg(next->debugreg7, 7); - } - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index b5e4bfe..297ffff 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -61,6 +61,8 @@ #include #include #include +#include +#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -265,7 +267,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, task_user_gs(p) = get_user_gs(regs); + p->thread.io_bitmap_ptr = NULL; tsk = current; + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES, GFP_KERNEL); @@ -285,10 +293,13 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); +out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -427,6 +438,23 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5a1a1de..f7b276d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -55,6 +55,8 @@ #include #include #include +#include +#include asmlinkage extern void ret_from_fork(void); @@ -248,6 +250,8 @@ void release_thread(struct task_struct *dead_task) BUG(); } } + if (unlikely(dead_task->thread.debugreg7)) + flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -303,12 +307,18 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.fs = me->thread.fs; p->thread.gs = me->thread.gs; + p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); savesegment(fs, p->thread.fsindex); savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); + err = -ENOMEM; + if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) + if (copy_thread_hw_breakpoint(me, p, clone_flags)) + goto out; + if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { @@ -347,6 +357,9 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } + if (err) + flush_thread_hw_breakpoint(p); + return err; } @@ -492,6 +505,24 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (tsk_used_math(next_p) && next_p->fpu_counter > 5) math_state_restore(); + /* + * There's a problem with moving the arch_install_thread_hw_breakpoint() + * call before current is updated. Suppose a kernel breakpoint is + * triggered in between the two, the hw-breakpoint handler will see that + * the 'current' task does not have TIF_DEBUG flag set and will think it + * is leftover from an old task (lazy switching) and will erase it. Then + * until the next context switch, no user-breakpoints will be installed. + * + * The real problem is that it's impossible to update both current and + * physical debug registers at the same instant, so there will always be + * a window in which they disagree and a breakpoint might get triggered. + * Since we use lazy switching, we are forced to assume that a + * disagreement means that current is correct and the exception is due + * to lazy debug register switching. + */ + if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) + arch_install_thread_hw_breakpoint(next_p); + return prev_p; } -- cgit v1.1 From da0cdc14f5f7e0faee6b2393fefed056cdb17146 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:03 +0530 Subject: hw-breakpoints: modify signal handling code to refrain from re-enabling HW Breakpoints This patch disables re-enabling of Hardware Breakpoint registers through the signal handling code. This is now done during from hw_breakpoint_handler(). Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/signal.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 1442516..f33d2e0 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -800,15 +800,6 @@ static void do_signal(struct pt_regs *regs) signr = get_signal_to_deliver(&info, &ka, regs, NULL); if (signr > 0) { - /* - * Re-enable any watchpoints before delivering the - * signal to user space. The processor register will - * have been cleared if the watchpoint triggered - * inside the kernel. - */ - if (current->thread.debugreg7) - set_debugreg(current->thread.debugreg7, 7); - /* Whee! Actually deliver the signal. */ if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { /* -- cgit v1.1 From 72f674d203cd230426437cdcf7dd6f681dad8b0d Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:45:48 +0530 Subject: hw-breakpoints: modify Ptrace routines to access breakpoint registers This patch modifies the ptrace code to use the new wrapper routines around the debug/breakpoint registers. [ Impact: adapt x86 ptrace to the new breakpoint Api ] Original-patch-by: Alan Stern Signed-off-by: K.Prasad Signed-off-by: Maneesh Soni Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/ptrace.c | 231 +++++++++++++++++++++++++++++------------------ 1 file changed, 141 insertions(+), 90 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 313be40..b457f78 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -34,6 +34,7 @@ #include #include #include +#include #include @@ -136,11 +137,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ - return TASK_SIZE - 3; -} - #else /* CONFIG_X86_64 */ #define FLAG_MASK (FLAG_MASK_32 | X86_EFLAGS_NT) @@ -265,15 +261,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long debugreg_addr_limit(struct task_struct *task) -{ -#ifdef CONFIG_IA32_EMULATION - if (test_tsk_thread_flag(task, TIF_IA32)) - return IA32_PAGE_OFFSET - 3; -#endif - return TASK_SIZE_MAX - 7; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -464,95 +451,159 @@ static int genregs_set(struct task_struct *target, } /* - * This function is trivial and will be inlined by the compiler. - * Having it separates the implementation details of debug - * registers from the interface details of ptrace. + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. */ -static unsigned long ptrace_get_debugreg(struct task_struct *child, int n) +static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, + unsigned *type) { - switch (n) { - case 0: return child->thread.debugreg[0]; - case 1: return child->thread.debugreg[1]; - case 2: return child->thread.debugreg[2]; - case 3: return child->thread.debugreg[3]; - case 6: return child->thread.debugreg6; - case 7: return child->thread.debugreg7; - } - return 0; + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); + + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } -static int ptrace_set_debugreg(struct task_struct *child, - int n, unsigned long data) +static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) { + struct thread_struct *thread = &(current->thread); int i; - if (unlikely(n == 4 || n == 5)) - return -EIO; + /* + * Store in the virtual DR6 register the fact that the breakpoint + * was hit so the thread's debugger will see it. + */ + for (i = 0; i < hbp_kernel_pos; i++) + /* + * We will check bp->info.address against the address stored in + * thread's hbp structure and not debugreg[i]. This is to ensure + * that the corresponding bit for 'i' in DR7 register is enabled + */ + if (bp->info.address == thread->hbp[i]->info.address) + break; - if (n < 4 && unlikely(data >= debugreg_addr_limit(child))) - return -EIO; + thread->debugreg6 |= (DR_TRAP0 << i); +} - switch (n) { - case 0: child->thread.debugreg[0] = data; break; - case 1: child->thread.debugreg[1] = data; break; - case 2: child->thread.debugreg[2] = data; break; - case 3: child->thread.debugreg[3] = data; break; +/* + * Handle ptrace writes to debug register 7. + */ +static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long old_dr7 = thread->debugreg7; + int i, orig_ret = 0, rc = 0; + int enabled, second_pass = 0; + unsigned len, type; + struct hw_breakpoint *bp; + + data &= ~DR_CONTROL_RESERVED; +restore: + /* + * Loop through all the hardware breakpoints, making the + * appropriate changes to each. + */ + for (i = 0; i < HBP_NUM; i++) { + enabled = decode_dr7(data, i, &len, &type); + bp = thread->hbp[i]; + + if (!enabled) { + if (bp) { + /* Don't unregister the breakpoints right-away, + * unless all register_user_hw_breakpoint() + * requests have succeeded. This prevents + * any window of opportunity for debug + * register grabbing by other users. + */ + if (!second_pass) + continue; + unregister_user_hw_breakpoint(tsk, bp); + kfree(bp); + } + continue; + } + if (!bp) { + rc = -ENOMEM; + bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); + if (bp) { + bp->info.address = thread->debugreg[i]; + bp->triggered = ptrace_triggered; + bp->info.len = len; + bp->info.type = type; + rc = register_user_hw_breakpoint(tsk, bp); + if (rc) + kfree(bp); + } + } else + rc = modify_user_hw_breakpoint(tsk, bp); + if (rc) + break; + } + /* + * Make a second pass to free the remaining unused breakpoints + * or to restore the original breakpoints if an error occurred. + */ + if (!second_pass) { + second_pass = 1; + if (rc < 0) { + orig_ret = rc; + data = old_dr7; + } + goto restore; + } + return ((orig_ret < 0) ? orig_ret : rc); +} - case 6: - if ((data & ~0xffffffffUL) != 0) - return -EIO; - child->thread.debugreg6 = data; - break; +/* + * Handle PTRACE_PEEKUSR calls for the debug register area. + */ +unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +{ + struct thread_struct *thread = &(tsk->thread); + unsigned long val = 0; + + if (n < HBP_NUM) + val = thread->debugreg[n]; + else if (n == 6) + val = thread->debugreg6; + else if (n == 7) + val = thread->debugreg7; + return val; +} - case 7: - /* - * Sanity-check data. Take one half-byte at once with - * check = (val >> (16 + 4*i)) & 0xf. It contains the - * R/Wi and LENi bits; bits 0 and 1 are R/Wi, and bits - * 2 and 3 are LENi. Given a list of invalid values, - * we do mask |= 1 << invalid_value, so that - * (mask >> check) & 1 is a correct test for invalid - * values. - * - * R/Wi contains the type of the breakpoint / - * watchpoint, LENi contains the length of the watched - * data in the watchpoint case. - * - * The invalid values are: - * - LENi == 0x10 (undefined), so mask |= 0x0f00. [32-bit] - * - R/Wi == 0x10 (break on I/O reads or writes), so - * mask |= 0x4444. - * - R/Wi == 0x00 && LENi != 0x00, so we have mask |= - * 0x1110. - * - * Finally, mask = 0x0f00 | 0x4444 | 0x1110 == 0x5f54. - * - * See the Intel Manual "System Programming Guide", - * 15.2.4 - * - * Note that LENi == 0x10 is defined on x86_64 in long - * mode (i.e. even for 32-bit userspace software, but - * 64-bit kernel), so the x86_64 mask value is 0x5454. - * See the AMD manual no. 24593 (AMD64 System Programming) - */ -#ifdef CONFIG_X86_32 -#define DR7_MASK 0x5f54 -#else -#define DR7_MASK 0x5554 -#endif - data &= ~DR_CONTROL_RESERVED; - for (i = 0; i < 4; i++) - if ((DR7_MASK >> ((data >> (16 + 4*i)) & 0xf)) & 1) - return -EIO; - child->thread.debugreg7 = data; - if (data) - set_tsk_thread_flag(child, TIF_DEBUG); - else - clear_tsk_thread_flag(child, TIF_DEBUG); - break; +/* + * Handle PTRACE_POKEUSR calls for the debug register area. + */ +int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) +{ + struct thread_struct *thread = &(tsk->thread); + int rc = 0; + + /* There are no DR4 or DR5 registers */ + if (n == 4 || n == 5) + return -EIO; + + if (n == 6) { + tsk->thread.debugreg6 = val; + goto ret_path; } + if (n < HBP_NUM) { + if (thread->hbp[n]) { + if (arch_check_va_in_userspace(val, + thread->hbp[n]->info.len) == 0) { + rc = -EIO; + goto ret_path; + } + thread->hbp[n]->info.address = val; + } + thread->debugreg[n] = val; + } + /* All that's left is DR7 */ + if (n == 7) + rc = ptrace_write_dr7(tsk, val); - return 0; +ret_path: + return rc; } /* -- cgit v1.1 From 17f557e5b5d43a2af66c969f6560ac7105020672 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:46:03 +0530 Subject: hw-breakpoints: cleanup HW Breakpoint registers before kexec This patch disables Hardware breakpoints before doing a 'kexec' on the machine so that the cpu doesn't keep debug registers values which would be out of sync for the new image. Original-patch-by: Alan Stern Signed-off-by: K.Prasad Reviewed-by: Alan Stern Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/machine_kexec_32.c | 2 ++ arch/x86/kernel/machine_kexec_64.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index c1c429d..c843f84 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -25,6 +25,7 @@ #include #include #include +#include static void set_idt(void *newidt, __u16 limit) { @@ -202,6 +203,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 84c3bf2..4a8bb82 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -18,6 +18,7 @@ #include #include #include +#include static int init_one_level2_page(struct kimage *image, pgd_t *pgd, unsigned long addr) @@ -282,6 +283,7 @@ void machine_kexec(struct kimage *image) /* Interrupts aren't acceptable while we reboot */ local_irq_disable(); + hw_breakpoint_disable(); if (image->preserve_context) { #ifdef CONFIG_X86_IO_APIC -- cgit v1.1 From 62edab9056a6cf0c9207339c8892c923a5217e45 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Mon, 1 Jun 2009 23:47:06 +0530 Subject: hw-breakpoints: reset bits in dr6 after the corresponding exception is handled This patch resets the bit in dr6 after the corresponding exception is handled in code, so that we keep a clean track of the current virtual debug status register. [ Impact: keep track of breakpoints triggering completion ] Signed-off-by: K.Prasad Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/hw_breakpoint.c | 13 +++++++++++-- arch/x86/kernel/kgdb.c | 6 ++++++ arch/x86/kernel/kprobes.c | 9 ++++++++- arch/x86/kernel/traps.c | 4 ++-- arch/x86/mm/kmmio.c | 8 +++++++- 5 files changed, 34 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 4867c9f..6945147 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -314,8 +314,12 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct hw_breakpoint *bp; - /* The DR6 value is stored in args->err */ - unsigned long dr7, dr6 = args->err; + unsigned long dr7, dr6; + unsigned long *dr6_p; + + /* The DR6 value is pointed by args->err */ + dr6_p = (unsigned long *)ERR_PTR(args->err); + dr6 = *dr6_p; /* Do an early return if no trap bits are set in DR6 */ if ((dr6 & DR_TRAP_BITS) == 0) @@ -352,6 +356,11 @@ int __kprobes hw_breakpoint_handler(struct die_args *args) rc = NOTIFY_DONE; } /* + * Reset the 'i'th TRAP bit in dr6 to denote completion of + * exception handling + */ + (*dr6_p) &= ~(DR_TRAP0 << i); + /* * bp can be NULL due to lazy debug register switching * or due to the delay between updates of hbp_kernel_pos * and this_hbp_kernel. diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b1f4dff..f820b73 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -434,6 +435,11 @@ single_step_cont(struct pt_regs *regs, struct die_args *args) "resuming...\n"); kgdb_arch_handle_exception(args->trapnr, args->signr, args->err, "c", "", regs); + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; return NOTIFY_STOP; } diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7b5169d..b5b1848 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -54,6 +54,7 @@ #include #include #include +#include void jprobe_return_end(void); @@ -967,8 +968,14 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, ret = NOTIFY_STOP; break; case DIE_DEBUG: - if (post_kprobe_handler(args->regs)) + if (post_kprobe_handler(args->regs)) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(args->err)) &= ~DR_STEP; ret = NOTIFY_STOP; + } break; case DIE_GPF: /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index de99132..124a4d5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -545,8 +545,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; - if (notify_die(DIE_DEBUG, "debug", regs, dr6, error_code, - SIGTRAP) == NOTIFY_STOP) + if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code, + SIGTRAP) == NOTIFY_STOP) return; /* It's safe to allow irq's after DR6 has been saved */ diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 16ccbd7..11a4ad4 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -540,8 +540,14 @@ kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) struct die_args *arg = args; if (val == DIE_DEBUG && (arg->err & DR_STEP)) - if (post_kmmio_handler(arg->err, arg->regs) == 1) + if (post_kmmio_handler(arg->err, arg->regs) == 1) { + /* + * Reset the BS bit in dr6 (pointed by args->err) to + * denote completion of processing + */ + (*(unsigned long *)ERR_PTR(arg->err)) &= ~DR_STEP; return NOTIFY_STOP; + } return NOTIFY_DONE; } -- cgit v1.1 From 4555835b707d5c778ee1c9076670bc99b1eeaf61 Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 17 Jun 2009 14:44:19 +0530 Subject: x86: hw_breakpoint.c arch_check_va_in_kernelspace and hw_breakpoint_handler should be static arch_check_va_in_kernelspace() and hw_breakpoint_handler() is used only by same file so it should be static. Also fixed non-ANSI function declaration of function 'arch_uninstall_thread_hw_breakpoint' Fixed following sparse warnings : arch/x86/kernel/hw_breakpoint.c:124:42: warning: non-ANSI function declaration of function 'arch_uninstall_thread_hw_breakpoint' arch/x86/kernel/hw_breakpoint.c:169:5: warning: symbol 'arch_check_va_in_kernelspace' was not declared. Should it be static? arch/x86/kernel/hw_breakpoint.c:313:15: warning: symbol 'hw_breakpoint_handler' was not declared. Should it be static? Signed-off-by: Jaswinder Singh Rajput Cc: Alan Stern Cc: "K.Prasad" Cc: Frederic Weisbecker LKML-Reference: <1245230059.2662.4.camel@ht.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/hw_breakpoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 51d9595..9316a9d 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -121,7 +121,7 @@ void arch_install_thread_hw_breakpoint(struct task_struct *tsk) /* * Install the debug register values for just the kernel, no thread. */ -void arch_uninstall_thread_hw_breakpoint() +void arch_uninstall_thread_hw_breakpoint(void) { /* Clear the user-space portion of debugreg7 by setting only kdr7 */ set_debugreg(kdr7, 7); @@ -166,7 +166,7 @@ int arch_check_va_in_userspace(unsigned long va, u8 hbp_len) /* * Check for virtual address in kernel space. */ -int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) +static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) { unsigned int len; @@ -310,7 +310,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) * NOTIFY_STOP returned for all other cases * */ -int __kprobes hw_breakpoint_handler(struct die_args *args) +static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; struct hw_breakpoint *bp; -- cgit v1.1 From 9d22b536609abf0d64648f99518676ea58245e3b Mon Sep 17 00:00:00 2001 From: Jaswinder Singh Rajput Date: Wed, 1 Jul 2009 19:52:30 +0530 Subject: x86: Mark ptrace_get_debugreg() as static This sparse warning: arch/x86/kernel/ptrace.c:560:15: warning: symbol 'ptrace_get_debugreg' was not declared. Should it be static? triggers because ptrace_get_debugreg() is global but is only used in a single .c file. change ptrace_get_debugreg() to static to fix that - this also addresses the sparse warning. Signed-off-by: Jaswinder Singh Rajput Cc: Steven Rostedt LKML-Reference: <1246458150.6940.19.camel@hpdv5.satnam> Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index b457f78..cabdabc 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -557,7 +557,7 @@ restore: /* * Handle PTRACE_PEEKUSR calls for the debug register area. */ -unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) +static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) { struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; -- cgit v1.1 From 41a48d14f6991020c9bb6b93e289ca5b411ed09a Mon Sep 17 00:00:00 2001 From: Paul Mundt Date: Mon, 5 Oct 2009 19:23:06 +0900 Subject: x86/hw-breakpoints: Actually flush thread breakpoints in flush_thread(). flush_thread() tries to do a TIF_DEBUG check before calling in to flush_thread_hw_breakpoint() (which subsequently clears the thread flag), but for some reason, the x86 code is manually clearing TIF_DEBUG immediately before the test, so this path will never be taken. This kills off the erroneous clear_tsk_thread_flag() and lets flush_thread_hw_breakpoint() actually get invoked. Presumably folks were getting lucky with testing and the free_thread_info() -> free_thread_xstate() path was taking care of the flush there. Signed-off-by: Paul Mundt Acked-by: "K.Prasad" Cc: Ingo Molnar Cc: Alan Stern LKML-Reference: <20091005102306.GA7889@linux-sh.org> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/process.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2275ce5..cf8ee00 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -107,8 +107,6 @@ void flush_thread(void) } #endif - clear_tsk_thread_flag(tsk, TIF_DEBUG); - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) flush_thread_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); -- cgit v1.1 From 2da3e160cb3d226d87b907fab26850d838ed8d7c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 5 Nov 2009 23:06:50 +0100 Subject: hw-breakpoint: Move asm-generic/hw_breakpoint.h to linux/hw_breakpoint.h We plan to make the breakpoints parameters generic among architectures. For that it's better to move the asm-generic header to a generic linux header. Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/hw_breakpoint.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 1acb4d4..3cfca8e 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -12,7 +12,7 @@ struct arch_hw_breakpoint { }; #include -#include +#include /* Available HW breakpoint length encodings */ #define HW_BREAKPOINT_LEN_1 0x40 -- cgit v1.1 From 24f1e32c60c45c89a997c73395b69c8af6f0a84e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Sep 2009 19:22:48 +0200 Subject: hw-breakpoints: Rewrite the hw-breakpoints layer on top of perf events This patch rebase the implementation of the breakpoints API on top of perf events instances. Each breakpoints are now perf events that handle the register scheduling, thread/cpu attachment, etc.. The new layering is now made as follows: ptrace kgdb ftrace perf syscall \ | / / \ | / / / Core breakpoint API / / | / | / Breakpoints perf events | | Breakpoints PMU ---- Debug Register constraints handling (Part of core breakpoint API) | | Hardware debug registers Reasons of this rewrite: - Use the centralized/optimized pmu registers scheduling, implying an easier arch integration - More powerful register handling: perf attributes (pinned/flexible events, exclusive/non-exclusive, tunable period, etc...) Impact: - New perf ABI: the hardware breakpoints counters - Ptrace breakpoints setting remains tricky and still needs some per thread breakpoints references. Todo (in the order): - Support breakpoints perf counter events for perf tools (ie: implement perf_bpcounter_event()) - Support from perf tools Changes in v2: - Follow the perf "event " rename - The ptrace regression have been fixed (ptrace breakpoint perf events weren't released when a task ended) - Drop the struct hw_breakpoint and store generic fields in perf_event_attr. - Separate core and arch specific headers, drop asm-generic/hw_breakpoint.h and create linux/hw_breakpoint.h - Use new generic len/type for breakpoint - Handle off case: when breakpoints api is not supported by an arch Changes in v3: - Fix broken CONFIG_KVM, we need to propagate the breakpoint api changes to kvm when we exit the guest and restore the bp registers to the host. Changes in v4: - Drop the hw_breakpoint_restore() stub as it is only used by KVM - EXPORT_SYMBOL_GPL hw_breakpoint_restore() as KVM can be built as a module - Restore the breakpoints unconditionally on kvm guest exit: TIF_DEBUG_THREAD doesn't anymore cover every cases of running breakpoints and vcpu->arch.switch_db_regs might not always be set when the guest used debug registers. (Waiting for a reliable optimization) Changes in v5: - Split-up the asm-generic/hw-breakpoint.h moving to linux/hw_breakpoint.h into a separate patch - Optimize the breakpoints restoring while switching from kvm guest to host. We only want to restore the state if we have active breakpoints to the host, otherwise we don't care about messed-up address registers. - Add asm/hw_breakpoint.h to Kbuild - Fix bad breakpoint type in trace_selftest.c Changes in v6: - Fix wrong header inclusion in trace.h (triggered a build error with CONFIG_FTRACE_SELFTEST Signed-off-by: Frederic Weisbecker Cc: Prasad Cc: Alan Stern Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Steven Rostedt Cc: Ingo Molnar Cc: Jan Kiszka Cc: Jiri Slaby Cc: Li Zefan Cc: Avi Kivity Cc: Paul Mackerras Cc: Mike Galbraith Cc: Masami Hiramatsu Cc: Paul Mundt --- arch/Kconfig | 3 + arch/x86/include/asm/Kbuild | 1 + arch/x86/include/asm/debugreg.h | 11 +- arch/x86/include/asm/hw_breakpoint.h | 58 ++++-- arch/x86/include/asm/processor.h | 12 +- arch/x86/kernel/hw_breakpoint.c | 391 +++++++++++++++++++++++------------ arch/x86/kernel/process.c | 7 +- arch/x86/kernel/process_32.c | 26 +-- arch/x86/kernel/process_64.c | 26 +-- arch/x86/kernel/ptrace.c | 182 +++++++++++----- arch/x86/kernel/smpboot.c | 3 - arch/x86/kvm/x86.c | 18 +- arch/x86/power/cpu.c | 6 - 13 files changed, 448 insertions(+), 296 deletions(-) (limited to 'arch') diff --git a/arch/Kconfig b/arch/Kconfig index acb66439..eef3bbb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -128,6 +128,9 @@ config HAVE_DEFAULT_NO_SPIN_MUTEXES config HAVE_HW_BREAKPOINT bool + depends on HAVE_PERF_EVENTS + select ANON_INODES + select PERF_EVENTS source "kernel/gcov/Kconfig" diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80c..9f828f8 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -10,6 +10,7 @@ header-y += ptrace-abi.h header-y += sigcontext32.h header-y += ucontext.h header-y += processor-flags.h +header-y += hw_breakpoint.h unifdef-y += e820.h unifdef-y += ist.h diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 23439fb..9a3333c 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -75,13 +75,8 @@ */ #ifdef __KERNEL__ -/* For process management */ -extern void flush_thread_hw_breakpoint(struct task_struct *tsk); -extern int copy_thread_hw_breakpoint(struct task_struct *tsk, - struct task_struct *child, unsigned long clone_flags); +DECLARE_PER_CPU(unsigned long, dr7); -/* For CPU management */ -extern void load_debug_registers(void); static inline void hw_breakpoint_disable(void) { /* Zero the control register for HW Breakpoint */ @@ -94,6 +89,10 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +#ifdef CONFIG_KVM +extern void hw_breakpoint_restore(void); +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_X86_DEBUGREG_H */ diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 3cfca8e..0675a7c 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -4,6 +4,11 @@ #ifdef __KERNEL__ #define __ARCH_HW_BREAKPOINT_H +/* + * The name should probably be something dealt in + * a higher level. While dealing with the user + * (display/resolving) + */ struct arch_hw_breakpoint { char *name; /* Contains name of the symbol to set bkpt */ unsigned long address; @@ -12,44 +17,57 @@ struct arch_hw_breakpoint { }; #include -#include +#include +#include /* Available HW breakpoint length encodings */ -#define HW_BREAKPOINT_LEN_1 0x40 -#define HW_BREAKPOINT_LEN_2 0x44 -#define HW_BREAKPOINT_LEN_4 0x4c -#define HW_BREAKPOINT_LEN_EXECUTE 0x40 +#define X86_BREAKPOINT_LEN_1 0x40 +#define X86_BREAKPOINT_LEN_2 0x44 +#define X86_BREAKPOINT_LEN_4 0x4c +#define X86_BREAKPOINT_LEN_EXECUTE 0x40 #ifdef CONFIG_X86_64 -#define HW_BREAKPOINT_LEN_8 0x48 +#define X86_BREAKPOINT_LEN_8 0x48 #endif /* Available HW breakpoint type encodings */ /* trigger on instruction execute */ -#define HW_BREAKPOINT_EXECUTE 0x80 +#define X86_BREAKPOINT_EXECUTE 0x80 /* trigger on memory write */ -#define HW_BREAKPOINT_WRITE 0x81 +#define X86_BREAKPOINT_WRITE 0x81 /* trigger on memory read or write */ -#define HW_BREAKPOINT_RW 0x83 +#define X86_BREAKPOINT_RW 0x83 /* Total number of available HW breakpoint registers */ #define HBP_NUM 4 -extern struct hw_breakpoint *hbp_kernel[HBP_NUM]; -DECLARE_PER_CPU(struct hw_breakpoint*, this_hbp_kernel[HBP_NUM]); -extern unsigned int hbp_user_refcount[HBP_NUM]; +struct perf_event; +struct pmu; -extern void arch_install_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_uninstall_thread_hw_breakpoint(void); extern int arch_check_va_in_userspace(unsigned long va, u8 hbp_len); -extern int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk); -extern void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk); -extern void arch_flush_thread_hw_breakpoint(struct task_struct *tsk); -extern void arch_update_kernel_hw_breakpoint(void *); +extern int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk); extern int hw_breakpoint_exceptions_notify(struct notifier_block *unused, - unsigned long val, void *data); + unsigned long val, void *data); + + +int arch_install_hw_breakpoint(struct perf_event *bp); +void arch_uninstall_hw_breakpoint(struct perf_event *bp); +void hw_breakpoint_pmu_read(struct perf_event *bp); +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp); + +extern void +arch_fill_perf_breakpoint(struct perf_event *bp); + +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type); +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type); + +extern int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type); + +extern struct pmu perf_ops_bp; + #endif /* __KERNEL__ */ #endif /* _I386_HW_BREAKPOINT_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 61aafb7..820f300 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -423,6 +423,8 @@ extern unsigned int xstate_size; extern void free_thread_xstate(struct task_struct *); extern struct kmem_cache *task_xstate_cachep; +struct perf_event; + struct thread_struct { /* Cached TLS descriptors: */ struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES]; @@ -444,12 +446,10 @@ struct thread_struct { unsigned long fs; #endif unsigned long gs; - /* Hardware debugging registers: */ - unsigned long debugreg[HBP_NUM]; - unsigned long debugreg6; - unsigned long debugreg7; - /* Hardware breakpoint info */ - struct hw_breakpoint *hbp[HBP_NUM]; + /* Save middle states of ptrace breakpoints */ + struct perf_event *ptrace_bps[HBP_NUM]; + /* Debug status used for traps, single steps, etc... */ + unsigned long debugreg6; /* Fault info: */ unsigned long cr2; unsigned long trap_no; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 9316a9d..e622620 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -15,6 +15,7 @@ * * Copyright (C) 2007 Alan Stern * Copyright (C) 2009 IBM Corporation + * Copyright (C) 2009 Frederic Weisbecker */ /* @@ -22,6 +23,8 @@ * using the CPU's debug registers. */ +#include +#include #include #include #include @@ -38,26 +41,24 @@ #include #include -/* Unmasked kernel DR7 value */ -static unsigned long kdr7; +/* Per cpu debug control register value */ +DEFINE_PER_CPU(unsigned long, dr7); + +/* Per cpu debug address registers values */ +static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); /* - * Masks for the bits corresponding to registers DR0 - DR3 in DR7 register. - * Used to clear and verify the status of bits corresponding to DR0 - DR3 + * Stores the breakpoints currently in use on each breakpoint address + * register for each cpus */ -static const unsigned long dr7_masks[HBP_NUM] = { - 0x000f0003, /* LEN0, R/W0, G0, L0 */ - 0x00f0000c, /* LEN1, R/W1, G1, L1 */ - 0x0f000030, /* LEN2, R/W2, G2, L2 */ - 0xf00000c0 /* LEN3, R/W3, G3, L3 */ -}; +static DEFINE_PER_CPU(struct perf_event *, bp_per_reg[HBP_NUM]); /* * Encode the length, type, Exact, and Enable bits for a particular breakpoint * as stored in debug register 7. */ -static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) +unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) { unsigned long bp_info; @@ -68,64 +69,89 @@ static unsigned long encode_dr7(int drnum, unsigned int len, unsigned int type) return bp_info; } -void arch_update_kernel_hw_breakpoint(void *unused) +/* + * Decode the length and type bits for a particular breakpoint as + * stored in debug register 7. Return the "enabled" status. + */ +int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, unsigned *type) { - struct hw_breakpoint *bp; - int i, cpu = get_cpu(); - unsigned long temp_kdr7 = 0; - - /* Don't allow debug exceptions while we update the registers */ - set_debugreg(0UL, 7); + int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - for (i = hbp_kernel_pos; i < HBP_NUM; i++) { - per_cpu(this_hbp_kernel[i], cpu) = bp = hbp_kernel[i]; - if (bp) { - temp_kdr7 |= encode_dr7(i, bp->info.len, bp->info.type); - set_debugreg(bp->info.address, i); - } - } + *len = (bp_info & 0xc) | 0x40; + *type = (bp_info & 0x3) | 0x80; - /* No need to set DR6. Update the debug registers with kernel-space - * breakpoint values from kdr7 and user-space requests from the - * current process - */ - kdr7 = temp_kdr7; - set_debugreg(kdr7 | current->thread.debugreg7, 7); - put_cpu(); + return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; } /* - * Install the thread breakpoints in their debug registers. + * Install a perf counter breakpoint. + * + * We seek a free debug address register and use it for this + * breakpoint. Eventually we enable it in the debug control register. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_install_thread_hw_breakpoint(struct task_struct *tsk) +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct thread_struct *thread = &(tsk->thread); - - switch (hbp_kernel_pos) { - case 4: - set_debugreg(thread->debugreg[3], 3); - case 3: - set_debugreg(thread->debugreg[2], 2); - case 2: - set_debugreg(thread->debugreg[1], 1); - case 1: - set_debugreg(thread->debugreg[0], 0); - default: - break; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (!*slot) { + *slot = bp; + break; + } } - /* No need to set DR6 */ - set_debugreg((kdr7 | thread->debugreg7), 7); + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return -EBUSY; + + set_debugreg(info->address, i); + __get_cpu_var(cpu_debugreg[i]) = info->address; + + dr7 = &__get_cpu_var(dr7); + *dr7 |= encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); + + return 0; } /* - * Install the debug register values for just the kernel, no thread. + * Uninstall the breakpoint contained in the given counter. + * + * First we search the debug address register it uses and then we disable + * it. + * + * Atomic: we hold the counter->ctx->lock and we only handle variables + * and registers local to this cpu. */ -void arch_uninstall_thread_hw_breakpoint(void) +void arch_uninstall_hw_breakpoint(struct perf_event *bp) { - /* Clear the user-space portion of debugreg7 by setting only kdr7 */ - set_debugreg(kdr7, 7); + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned long *dr7; + int i; + + for (i = 0; i < HBP_NUM; i++) { + struct perf_event **slot = &__get_cpu_var(bp_per_reg[i]); + + if (*slot == bp) { + *slot = NULL; + break; + } + } + + if (WARN_ONCE(i == HBP_NUM, "Can't find any breakpoint slot")) + return; + dr7 = &__get_cpu_var(dr7); + *dr7 &= ~encode_dr7(i, info->len, info->type); + + set_debugreg(*dr7, 7); } static int get_hbp_len(u8 hbp_len) @@ -133,17 +159,17 @@ static int get_hbp_len(u8 hbp_len) unsigned int len_in_bytes = 0; switch (hbp_len) { - case HW_BREAKPOINT_LEN_1: + case X86_BREAKPOINT_LEN_1: len_in_bytes = 1; break; - case HW_BREAKPOINT_LEN_2: + case X86_BREAKPOINT_LEN_2: len_in_bytes = 2; break; - case HW_BREAKPOINT_LEN_4: + case X86_BREAKPOINT_LEN_4: len_in_bytes = 4; break; #ifdef CONFIG_X86_64 - case HW_BREAKPOINT_LEN_8: + case X86_BREAKPOINT_LEN_8: len_in_bytes = 8; break; #endif @@ -178,67 +204,146 @@ static int arch_check_va_in_kernelspace(unsigned long va, u8 hbp_len) /* * Store a breakpoint's encoded address, length, and type. */ -static int arch_store_info(struct hw_breakpoint *bp, struct task_struct *tsk) +static int arch_store_info(struct perf_event *bp) { - /* - * User-space requests will always have the address field populated - * Symbol names from user-space are rejected - */ - if (tsk && bp->info.name) - return -EINVAL; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); /* * For kernel-addresses, either the address or symbol name can be * specified. */ - if (bp->info.name) - bp->info.address = (unsigned long) - kallsyms_lookup_name(bp->info.name); - if (bp->info.address) + if (info->name) + info->address = (unsigned long) + kallsyms_lookup_name(info->name); + if (info->address) return 0; + return -EINVAL; } -/* - * Validate the arch-specific HW Breakpoint register settings - */ -int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, - struct task_struct *tsk) +int arch_bp_generic_fields(int x86_len, int x86_type, + int *gen_len, int *gen_type) { - unsigned int align; - int ret = -EINVAL; + /* Len */ + switch (x86_len) { + case X86_BREAKPOINT_LEN_1: + *gen_len = HW_BREAKPOINT_LEN_1; + break; + case X86_BREAKPOINT_LEN_2: + *gen_len = HW_BREAKPOINT_LEN_2; + break; + case X86_BREAKPOINT_LEN_4: + *gen_len = HW_BREAKPOINT_LEN_4; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: + *gen_len = HW_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } - switch (bp->info.type) { - /* - * Ptrace-refactoring code - * For now, we'll allow instruction breakpoint only for user-space - * addresses - */ - case HW_BREAKPOINT_EXECUTE: - if ((!arch_check_va_in_userspace(bp->info.address, - bp->info.len)) && - bp->info.len != HW_BREAKPOINT_LEN_EXECUTE) - return ret; + /* Type */ + switch (x86_type) { + case X86_BREAKPOINT_EXECUTE: + *gen_type = HW_BREAKPOINT_X; break; - case HW_BREAKPOINT_WRITE: + case X86_BREAKPOINT_WRITE: + *gen_type = HW_BREAKPOINT_W; break; - case HW_BREAKPOINT_RW: + case X86_BREAKPOINT_RW: + *gen_type = HW_BREAKPOINT_W | HW_BREAKPOINT_R; break; default: - return ret; + return -EINVAL; } - switch (bp->info.len) { + return 0; +} + + +static int arch_build_bp_info(struct perf_event *bp) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + + info->address = bp->attr.bp_addr; + + /* Len */ + switch (bp->attr.bp_len) { case HW_BREAKPOINT_LEN_1: - align = 0; + info->len = X86_BREAKPOINT_LEN_1; break; case HW_BREAKPOINT_LEN_2: - align = 1; + info->len = X86_BREAKPOINT_LEN_2; break; case HW_BREAKPOINT_LEN_4: - align = 3; + info->len = X86_BREAKPOINT_LEN_4; break; #ifdef CONFIG_X86_64 case HW_BREAKPOINT_LEN_8: + info->len = X86_BREAKPOINT_LEN_8; + break; +#endif + default: + return -EINVAL; + } + + /* Type */ + switch (bp->attr.bp_type) { + case HW_BREAKPOINT_W: + info->type = X86_BREAKPOINT_WRITE; + break; + case HW_BREAKPOINT_W | HW_BREAKPOINT_R: + info->type = X86_BREAKPOINT_RW; + break; + case HW_BREAKPOINT_X: + info->type = X86_BREAKPOINT_EXECUTE; + break; + default: + return -EINVAL; + } + + return 0; +} +/* + * Validate the arch-specific HW Breakpoint register settings + */ +int arch_validate_hwbkpt_settings(struct perf_event *bp, + struct task_struct *tsk) +{ + struct arch_hw_breakpoint *info = counter_arch_bp(bp); + unsigned int align; + int ret; + + + ret = arch_build_bp_info(bp); + if (ret) + return ret; + + ret = -EINVAL; + + if (info->type == X86_BREAKPOINT_EXECUTE) + /* + * Ptrace-refactoring code + * For now, we'll allow instruction breakpoint only for user-space + * addresses + */ + if ((!arch_check_va_in_userspace(info->address, info->len)) && + info->len != X86_BREAKPOINT_EXECUTE) + return ret; + + switch (info->len) { + case X86_BREAKPOINT_LEN_1: + align = 0; + break; + case X86_BREAKPOINT_LEN_2: + align = 1; + break; + case X86_BREAKPOINT_LEN_4: + align = 3; + break; +#ifdef CONFIG_X86_64 + case X86_BREAKPOINT_LEN_8: align = 7; break; #endif @@ -246,8 +351,8 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, return ret; } - if (bp->triggered) - ret = arch_store_info(bp, tsk); + if (bp->callback) + ret = arch_store_info(bp); if (ret < 0) return ret; @@ -255,44 +360,47 @@ int arch_validate_hwbkpt_settings(struct hw_breakpoint *bp, * Check that the low-order bits of the address are appropriate * for the alignment implied by len. */ - if (bp->info.address & align) + if (info->address & align) return -EINVAL; /* Check that the virtual address is in the proper range */ if (tsk) { - if (!arch_check_va_in_userspace(bp->info.address, bp->info.len)) + if (!arch_check_va_in_userspace(info->address, info->len)) return -EFAULT; } else { - if (!arch_check_va_in_kernelspace(bp->info.address, - bp->info.len)) + if (!arch_check_va_in_kernelspace(info->address, info->len)) return -EFAULT; } + return 0; } -void arch_update_user_hw_breakpoint(int pos, struct task_struct *tsk) +/* + * Release the user breakpoints used by ptrace + */ +void flush_ptrace_hw_breakpoint(struct task_struct *tsk) { - struct thread_struct *thread = &(tsk->thread); - struct hw_breakpoint *bp = thread->hbp[pos]; - - thread->debugreg7 &= ~dr7_masks[pos]; - if (bp) { - thread->debugreg[pos] = bp->info.address; - thread->debugreg7 |= encode_dr7(pos, bp->info.len, - bp->info.type); - } else - thread->debugreg[pos] = 0; + int i; + struct thread_struct *t = &tsk->thread; + + for (i = 0; i < HBP_NUM; i++) { + unregister_hw_breakpoint(t->ptrace_bps[i]); + t->ptrace_bps[i] = NULL; + } } -void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) +#ifdef CONFIG_KVM +void hw_breakpoint_restore(void) { - int i; - struct thread_struct *thread = &(tsk->thread); - - thread->debugreg7 = 0; - for (i = 0; i < HBP_NUM; i++) - thread->debugreg[i] = 0; + set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); + set_debugreg(__get_cpu_var(cpu_debugreg[1]), 1); + set_debugreg(__get_cpu_var(cpu_debugreg[2]), 2); + set_debugreg(__get_cpu_var(cpu_debugreg[3]), 3); + set_debugreg(current->thread.debugreg6, 6); + set_debugreg(__get_cpu_var(dr7), 7); } +EXPORT_SYMBOL_GPL(hw_breakpoint_restore); +#endif /* * Handle debug exception notifications. @@ -313,7 +421,7 @@ void arch_flush_thread_hw_breakpoint(struct task_struct *tsk) static int __kprobes hw_breakpoint_handler(struct die_args *args) { int i, cpu, rc = NOTIFY_STOP; - struct hw_breakpoint *bp; + struct perf_event *bp; unsigned long dr7, dr6; unsigned long *dr6_p; @@ -325,10 +433,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) if ((dr6 & DR_TRAP_BITS) == 0) return NOTIFY_DONE; - /* Lazy debug register switching */ - if (!test_tsk_thread_flag(current, TIF_DEBUG)) - arch_uninstall_thread_hw_breakpoint(); - get_debugreg(dr7, 7); /* Disable breakpoints during exception handling */ set_debugreg(0UL, 7); @@ -344,17 +448,18 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) for (i = 0; i < HBP_NUM; ++i) { if (likely(!(dr6 & (DR_TRAP0 << i)))) continue; + /* - * Find the corresponding hw_breakpoint structure and - * invoke its triggered callback. + * The counter may be concurrently released but that can only + * occur from a call_rcu() path. We can then safely fetch + * the breakpoint, use its callback, touch its counter + * while we are in an rcu_read_lock() path. */ - if (i >= hbp_kernel_pos) - bp = per_cpu(this_hbp_kernel[i], cpu); - else { - bp = current->thread.hbp[i]; - if (bp) - rc = NOTIFY_DONE; - } + rcu_read_lock(); + + bp = per_cpu(bp_per_reg[i], cpu); + if (bp) + rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -362,19 +467,23 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) (*dr6_p) &= ~(DR_TRAP0 << i); /* * bp can be NULL due to lazy debug register switching - * or due to the delay between updates of hbp_kernel_pos - * and this_hbp_kernel. + * or due to concurrent perf counter removing. */ - if (!bp) - continue; + if (!bp) { + rcu_read_unlock(); + break; + } + + (bp->callback)(bp, args->regs); - (bp->triggered)(bp, args->regs); + rcu_read_unlock(); } if (dr6 & (~DR_TRAP_BITS)) rc = NOTIFY_DONE; set_debugreg(dr7, 7); put_cpu(); + return rc; } @@ -389,3 +498,13 @@ int __kprobes hw_breakpoint_exceptions_notify( return hw_breakpoint_handler(data); } + +void hw_breakpoint_pmu_read(struct perf_event *bp) +{ + /* TODO */ +} + +void hw_breakpoint_pmu_unthrottle(struct perf_event *bp) +{ + /* TODO */ +} diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cf8ee00..744508e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include unsigned long idle_halt; EXPORT_SYMBOL(idle_halt); @@ -47,8 +47,6 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } @@ -107,8 +105,7 @@ void flush_thread(void) } #endif - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - flush_thread_hw_breakpoint(tsk); + flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* * Forget coprocessor state.. diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 209e748..d5bd313 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,7 +59,6 @@ #include #include #include -#include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -264,9 +263,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, p->thread.io_bitmap_ptr = NULL; tsk = current; err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(tsk, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(tsk, p, clone_flags)) - goto out; + + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr, @@ -287,13 +285,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, err = do_set_thread_area(p, -1, (struct user_desc __user *)childregs->si, 0); -out: if (err && p->thread.io_bitmap_ptr) { kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); p->thread.ds_ctx = NULL; @@ -437,23 +432,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) lazy_load_gs(next->gs); percpu_write(current_task, next_p); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 72edac0..5bafdec 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -53,7 +53,6 @@ #include #include #include -#include asmlinkage extern void ret_from_fork(void); @@ -244,8 +243,6 @@ void release_thread(struct task_struct *dead_task) BUG(); } } - if (unlikely(dead_task->thread.debugreg7)) - flush_thread_hw_breakpoint(dead_task); } static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr) @@ -309,9 +306,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, savesegment(ds, p->thread.ds); err = -ENOMEM; - if (unlikely(test_tsk_thread_flag(me, TIF_DEBUG))) - if (copy_thread_hw_breakpoint(me, p, clone_flags)) - goto out; + memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); @@ -351,8 +346,6 @@ out: kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - if (err) - flush_thread_hw_breakpoint(p); return err; } @@ -508,23 +501,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ if (preload_fpu) __math_state_restore(); - /* - * There's a problem with moving the arch_install_thread_hw_breakpoint() - * call before current is updated. Suppose a kernel breakpoint is - * triggered in between the two, the hw-breakpoint handler will see that - * the 'current' task does not have TIF_DEBUG flag set and will think it - * is leftover from an old task (lazy switching) and will erase it. Then - * until the next context switch, no user-breakpoints will be installed. - * - * The real problem is that it's impossible to update both current and - * physical debug registers at the same instant, so there will always be - * a window in which they disagree and a breakpoint might get triggered. - * Since we use lazy switching, we are forced to assume that a - * disagreement means that current is correct and the exception is due - * to lazy debug register switching. - */ - if (unlikely(test_tsk_thread_flag(next_p, TIF_DEBUG))) - arch_install_thread_hw_breakpoint(next_p); return prev_p; } diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 267cb85..e79610d 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include @@ -441,54 +443,59 @@ static int genregs_set(struct task_struct *target, return ret; } -/* - * Decode the length and type bits for a particular breakpoint as - * stored in debug register 7. Return the "enabled" status. - */ -static int decode_dr7(unsigned long dr7, int bpnum, unsigned *len, - unsigned *type) -{ - int bp_info = dr7 >> (DR_CONTROL_SHIFT + bpnum * DR_CONTROL_SIZE); - - *len = (bp_info & 0xc) | 0x40; - *type = (bp_info & 0x3) | 0x80; - return (dr7 >> (bpnum * DR_ENABLE_SIZE)) & 0x3; -} - -static void ptrace_triggered(struct hw_breakpoint *bp, struct pt_regs *regs) +static void ptrace_triggered(struct perf_event *bp, void *data) { - struct thread_struct *thread = &(current->thread); int i; + struct thread_struct *thread = &(current->thread); /* * Store in the virtual DR6 register the fact that the breakpoint * was hit so the thread's debugger will see it. */ - for (i = 0; i < hbp_kernel_pos; i++) - /* - * We will check bp->info.address against the address stored in - * thread's hbp structure and not debugreg[i]. This is to ensure - * that the corresponding bit for 'i' in DR7 register is enabled - */ - if (bp->info.address == thread->hbp[i]->info.address) + for (i = 0; i < HBP_NUM; i++) { + if (thread->ptrace_bps[i] == bp) break; + } thread->debugreg6 |= (DR_TRAP0 << i); } /* + * Walk through every ptrace breakpoints for this thread and + * build the dr7 value on top of their attributes. + * + */ +static unsigned long ptrace_get_dr7(struct perf_event *bp[]) +{ + int i; + int dr7 = 0; + struct arch_hw_breakpoint *info; + + for (i = 0; i < HBP_NUM; i++) { + if (bp[i] && !bp[i]->attr.disabled) { + info = counter_arch_bp(bp[i]); + dr7 |= encode_dr7(i, info->len, info->type); + } + } + + return dr7; +} + +/* * Handle ptrace writes to debug register 7. */ static int ptrace_write_dr7(struct task_struct *tsk, unsigned long data) { struct thread_struct *thread = &(tsk->thread); - unsigned long old_dr7 = thread->debugreg7; + unsigned long old_dr7; int i, orig_ret = 0, rc = 0; int enabled, second_pass = 0; unsigned len, type; - struct hw_breakpoint *bp; + int gen_len, gen_type; + struct perf_event *bp; data &= ~DR_CONTROL_RESERVED; + old_dr7 = ptrace_get_dr7(thread->ptrace_bps); restore: /* * Loop through all the hardware breakpoints, making the @@ -496,11 +503,12 @@ restore: */ for (i = 0; i < HBP_NUM; i++) { enabled = decode_dr7(data, i, &len, &type); - bp = thread->hbp[i]; + bp = thread->ptrace_bps[i]; if (!enabled) { if (bp) { - /* Don't unregister the breakpoints right-away, + /* + * Don't unregister the breakpoints right-away, * unless all register_user_hw_breakpoint() * requests have succeeded. This prevents * any window of opportunity for debug @@ -508,27 +516,45 @@ restore: */ if (!second_pass) continue; - unregister_user_hw_breakpoint(tsk, bp); - kfree(bp); + thread->ptrace_bps[i] = NULL; + unregister_hw_breakpoint(bp); } continue; } + + /* + * We shoud have at least an inactive breakpoint at this + * slot. It means the user is writing dr7 without having + * written the address register first + */ if (!bp) { - rc = -ENOMEM; - bp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL); - if (bp) { - bp->info.address = thread->debugreg[i]; - bp->triggered = ptrace_triggered; - bp->info.len = len; - bp->info.type = type; - rc = register_user_hw_breakpoint(tsk, bp); - if (rc) - kfree(bp); - } - } else - rc = modify_user_hw_breakpoint(tsk, bp); + rc = -EINVAL; + break; + } + + rc = arch_bp_generic_fields(len, type, &gen_len, &gen_type); if (rc) break; + + /* + * This is a temporary thing as bp is unregistered/registered + * to simulate modification + */ + bp = modify_user_hw_breakpoint(bp, bp->attr.bp_addr, gen_len, + gen_type, bp->callback, + tsk, true); + thread->ptrace_bps[i] = NULL; + + if (!bp) { /* incorrect bp, or we have a bug in bp API */ + rc = -EINVAL; + break; + } + if (IS_ERR(bp)) { + rc = PTR_ERR(bp); + bp = NULL; + break; + } + thread->ptrace_bps[i] = bp; } /* * Make a second pass to free the remaining unused breakpoints @@ -553,15 +579,63 @@ static unsigned long ptrace_get_debugreg(struct task_struct *tsk, int n) struct thread_struct *thread = &(tsk->thread); unsigned long val = 0; - if (n < HBP_NUM) - val = thread->debugreg[n]; - else if (n == 6) + if (n < HBP_NUM) { + struct perf_event *bp; + bp = thread->ptrace_bps[n]; + if (!bp) + return 0; + val = bp->hw.info.address; + } else if (n == 6) { val = thread->debugreg6; - else if (n == 7) - val = thread->debugreg7; + } else if (n == 7) { + val = ptrace_get_dr7(thread->ptrace_bps); + } return val; } +static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr, + unsigned long addr) +{ + struct perf_event *bp; + struct thread_struct *t = &tsk->thread; + + if (!t->ptrace_bps[nr]) { + /* + * Put stub len and type to register (reserve) an inactive but + * correct bp + */ + bp = register_user_hw_breakpoint(addr, HW_BREAKPOINT_LEN_1, + HW_BREAKPOINT_W, + ptrace_triggered, tsk, + false); + } else { + bp = t->ptrace_bps[nr]; + t->ptrace_bps[nr] = NULL; + bp = modify_user_hw_breakpoint(bp, addr, bp->attr.bp_len, + bp->attr.bp_type, + bp->callback, + tsk, + bp->attr.disabled); + } + + if (!bp) + return -EIO; + /* + * CHECKME: the previous code returned -EIO if the addr wasn't a + * valid task virtual addr. The new one will return -EINVAL in this + * case. + * -EINVAL may be what we want for in-kernel breakpoints users, but + * -EIO looks better for ptrace, since we refuse a register writing + * for the user. And anyway this is the previous behaviour. + */ + if (IS_ERR(bp)) + return PTR_ERR(bp); + + t->ptrace_bps[nr] = bp; + + return 0; +} + /* * Handle PTRACE_POKEUSR calls for the debug register area. */ @@ -575,19 +649,13 @@ int ptrace_set_debugreg(struct task_struct *tsk, int n, unsigned long val) return -EIO; if (n == 6) { - tsk->thread.debugreg6 = val; + thread->debugreg6 = val; goto ret_path; } if (n < HBP_NUM) { - if (thread->hbp[n]) { - if (arch_check_va_in_userspace(val, - thread->hbp[n]->info.len) == 0) { - rc = -EIO; - goto ret_path; - } - thread->hbp[n]->info.address = val; - } - thread->debugreg[n] = val; + rc = ptrace_set_breakpoint_addr(tsk, n, val); + if (rc) + return rc; } /* All that's left is DR7 */ if (n == 7) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 213a7a3..565ebc6 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -64,7 +64,6 @@ #include #include #include -#include #include #include @@ -328,7 +327,6 @@ notrace static void __cpuinit start_secondary(void *unused) x86_cpuinit.setup_percpu_clockev(); wmb(); - load_debug_registers(); cpu_idle(); } @@ -1269,7 +1267,6 @@ void cpu_disable_common(void) remove_cpu_from_maps(cpu); unlock_vector_lock(); fixup_irqs(); - hw_breakpoint_disable(); } int native_cpu_disable(void) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index fc2974a..22dee7a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -42,6 +42,7 @@ #define CREATE_TRACE_POINTS #include "trace.h" +#include #include #include #include @@ -3643,14 +3644,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) trace_kvm_entry(vcpu->vcpu_id); kvm_x86_ops->run(vcpu, kvm_run); - if (unlikely(vcpu->arch.switch_db_regs || test_thread_flag(TIF_DEBUG))) { - set_debugreg(current->thread.debugreg[0], 0); - set_debugreg(current->thread.debugreg[1], 1); - set_debugreg(current->thread.debugreg[2], 2); - set_debugreg(current->thread.debugreg[3], 3); - set_debugreg(current->thread.debugreg6, 6); - set_debugreg(current->thread.debugreg7, 7); - } + /* + * If the guest has used debug registers, at least dr7 + * will be disabled while returning to the host. + * If we don't have active breakpoints in the host, we don't + * care about the messed up debug address registers. But if + * we have some of them active, restore the old state. + */ + if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); local_irq_enable(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index e09a44f..0a979f3 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -105,7 +105,6 @@ static void __save_processor_state(struct saved_context *ctxt) ctxt->cr4 = read_cr4(); ctxt->cr8 = read_cr8(); #endif - hw_breakpoint_disable(); } /* Needed by apm.c */ @@ -144,11 +143,6 @@ static void fix_processor_context(void) #endif load_TR_desc(); /* This does ltr */ load_LDT(¤t->active_mm->context); /* This does lldt */ - - /* - * Now maybe reload the debug registers - */ - load_debug_registers(); } /** -- cgit v1.1 From 9f6b3c2c30cfbb1166ce7e74a8f9fd93ae19d2de Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 9 Nov 2009 21:03:43 +0100 Subject: hw-breakpoints: Fix broken a.out format dump Fix the broken a.out format dump. For now we only dump the ptrace breakpoints. TODO: Dump every perf breakpoints for the current thread, not only ptrace based ones. Reported-by: Ingo Molnar Signed-off-by: Frederic Weisbecker Cc: "K. Prasad" --- arch/x86/include/asm/a.out-core.h | 10 ++-------- arch/x86/include/asm/debugreg.h | 2 ++ arch/x86/kernel/hw_breakpoint.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index fc4685d..7a15588 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,7 @@ #include #include +#include /* * fill in the user structure for an a.out core dump @@ -32,14 +33,7 @@ static inline void aout_dump_thread(struct pt_regs *regs, struct user *dump) >> PAGE_SHIFT; dump->u_dsize -= dump->u_tsize; dump->u_ssize = 0; - dump->u_debugreg[0] = current->thread.debugreg[0]; - dump->u_debugreg[1] = current->thread.debugreg[1]; - dump->u_debugreg[2] = current->thread.debugreg[2]; - dump->u_debugreg[3] = current->thread.debugreg[3]; - dump->u_debugreg[4] = 0; - dump->u_debugreg[5] = 0; - dump->u_debugreg[6] = current->thread.debugreg6; - dump->u_debugreg[7] = current->thread.debugreg7; + aout_dump_debugregs(dump); if (dump->start_stack < TASK_SIZE) dump->u_ssize = ((unsigned long)(TASK_SIZE - dump->start_stack)) diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 9a3333c..f1b673f 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -89,6 +89,8 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +extern void aout_dump_debugregs(struct user *dump); + #ifdef CONFIG_KVM extern void hw_breakpoint_restore(void); #endif diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index e622620..57dcee5 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -376,6 +376,41 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, } /* + * Dump the debug register contents to the user. + * We can't dump our per cpu values because it + * may contain cpu wide breakpoint, something that + * doesn't belong to the current task. + * + * TODO: include non-ptrace user breakpoints (perf) + */ +void aout_dump_debugregs(struct user *dump) +{ + int i; + int dr7 = 0; + struct perf_event *bp; + struct arch_hw_breakpoint *info; + struct thread_struct *thread = ¤t->thread; + + for (i = 0; i < HBP_NUM; i++) { + bp = thread->ptrace_bps[i]; + + if (bp && !bp->attr.disabled) { + dump->u_debugreg[i] = bp->attr.bp_addr; + info = counter_arch_bp(bp); + dr7 |= encode_dr7(i, info->len, info->type); + } else { + dump->u_debugreg[i] = 0; + } + } + + dump->u_debugreg[4] = 0; + dump->u_debugreg[5] = 0; + dump->u_debugreg[6] = current->thread.debugreg6; + + dump->u_debugreg[7] = dr7; +} + +/* * Release the user breakpoints used by ptrace */ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) -- cgit v1.1 From 59d8eb53ea9947db7cad8ebc31b0fb54f23a9851 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 10 Nov 2009 11:03:12 +0100 Subject: hw-breakpoints: Wrap in the KVM breakpoint active state check Wrap in the cpu dr7 check that tells if we have active breakpoints that need to be restored in the cpu. This wrapper makes the check more self-explainable and also reusable for any further other uses. Reported-by: Jan Kiszka Signed-off-by: Frederic Weisbecker Cc: Avi Kivity Cc: "K. Prasad" --- arch/x86/include/asm/debugreg.h | 5 +++++ arch/x86/kvm/x86.c | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index f1b673f..0f6e92a 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -89,6 +89,11 @@ static inline void hw_breakpoint_disable(void) set_debugreg(0UL, 3); } +static inline int hw_breakpoint_active(void) +{ + return __get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK; +} + extern void aout_dump_debugregs(struct user *dump); #ifdef CONFIG_KVM diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 22dee7a..3817220 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3651,7 +3651,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) * care about the messed up debug address registers. But if * we have some of them active, restore the old state. */ - if (__get_cpu_var(dr7) & DR_GLOBAL_ENABLE_MASK) + if (hw_breakpoint_active()) hw_breakpoint_restore(); set_bit(KVM_REQ_KICK, &vcpu->requests); -- cgit v1.1 From 68efa37df779b3e04280598e8b5b3a1919b65fee Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Nov 2009 01:35:29 +0100 Subject: hw-breakpoints, x86: Fix modular KVM build This build error: arch/x86/kvm/x86.c:3655: error: implicit declaration of function 'hw_breakpoint_restore' Happens because in the CONFIG_KVM=m case there's no 'CONFIG_KVM' define in the kernel - it's CONFIG_KVM_MODULE in that case. Make the prototype available unconditionally. Cc: Frederic Weisbecker Cc: Prasad LKML-Reference: <1258114575-32655-1-git-send-email-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/debugreg.h | 2 -- arch/x86/kernel/hw_breakpoint.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 0f6e92a..fdabd84 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -96,9 +96,7 @@ static inline int hw_breakpoint_active(void) extern void aout_dump_debugregs(struct user *dump); -#ifdef CONFIG_KVM extern void hw_breakpoint_restore(void); -#endif #endif /* __KERNEL__ */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 57dcee5..752daeb 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -43,6 +43,7 @@ /* Per cpu debug control register value */ DEFINE_PER_CPU(unsigned long, dr7); +EXPORT_PER_CPU_SYMBOL(dr7); /* Per cpu debug address registers values */ static DEFINE_PER_CPU(unsigned long, cpu_debugreg[HBP_NUM]); @@ -409,6 +410,7 @@ void aout_dump_debugregs(struct user *dump) dump->u_debugreg[7] = dr7; } +EXPORT_SYMBOL_GPL(aout_dump_debugregs); /* * Release the user breakpoints used by ptrace @@ -424,7 +426,6 @@ void flush_ptrace_hw_breakpoint(struct task_struct *tsk) } } -#ifdef CONFIG_KVM void hw_breakpoint_restore(void) { set_debugreg(__get_cpu_var(cpu_debugreg[0]), 0); @@ -435,7 +436,6 @@ void hw_breakpoint_restore(void) set_debugreg(__get_cpu_var(dr7), 7); } EXPORT_SYMBOL_GPL(hw_breakpoint_restore); -#endif /* * Handle debug exception notifications. -- cgit v1.1