From 2f0993e0fb663c49e4d1e02654f6203246be4817 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 07:06:10 +0100 Subject: hw-breakpoints: Drop callback and task parameters from modify helper Drop the callback and task parameters from modify_user_hw_breakpoint(). For now we have no user that need to modify a breakpoint to the point of changing its handler or its task context. Signed-off-by: Frederic Weisbecker Cc: "K. Prasad" --- kernel/hw_breakpoint.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index cf5ee16..2d10b01 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -312,9 +312,7 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); * @tsk: pointer to 'task_struct' of the process to which the address belongs */ struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, - perf_callback_t triggered, - struct task_struct *tsk) +modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { /* * FIXME: do it without unregistering @@ -323,7 +321,8 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr, */ unregister_hw_breakpoint(bp); - return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); + return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid, + bp->callback); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); -- cgit v1.1 From b326e9560a28fc3e950637ef51847ed8f05c1335 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 5 Dec 2009 09:44:31 +0100 Subject: hw-breakpoints: Use overflow handler instead of the event callback struct perf_event::event callback was called when a breakpoint triggers. But this is a rather opaque callback, pretty tied-only to the breakpoint API and not really integrated into perf as it triggers even when we don't overflow. We prefer to use overflow_handler() as it fits into the perf events rules, being called only when we overflow. Reported-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: "K. Prasad" --- kernel/hw_breakpoint.c | 17 +++++------------ kernel/perf_event.c | 24 +++++++++--------------- kernel/trace/trace_ksym.c | 5 +++-- 3 files changed, 17 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 2d10b01..b600fc2 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -259,7 +259,7 @@ void release_bp_slot(struct perf_event *bp) } -int __register_perf_hw_breakpoint(struct perf_event *bp) +int register_perf_hw_breakpoint(struct perf_event *bp) { int ret; @@ -276,19 +276,12 @@ int __register_perf_hw_breakpoint(struct perf_event *bp) * This is a quick hack that will be removed soon, once we remove * the tmp breakpoints from ptrace */ - if (!bp->attr.disabled || bp->callback == perf_bp_event) + if (!bp->attr.disabled || !bp->overflow_handler) ret = arch_validate_hwbkpt_settings(bp, bp->ctx->task); return ret; } -int register_perf_hw_breakpoint(struct perf_event *bp) -{ - bp->callback = perf_bp_event; - - return __register_perf_hw_breakpoint(bp); -} - /** * register_user_hw_breakpoint - register a hardware breakpoint for user space * @attr: breakpoint attributes @@ -297,7 +290,7 @@ int register_perf_hw_breakpoint(struct perf_event *bp) */ struct perf_event * register_user_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered, + perf_overflow_handler_t triggered, struct task_struct *tsk) { return perf_event_create_kernel_counter(attr, -1, tsk->pid, triggered); @@ -322,7 +315,7 @@ modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) unregister_hw_breakpoint(bp); return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid, - bp->callback); + bp->overflow_handler); } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); @@ -347,7 +340,7 @@ EXPORT_SYMBOL_GPL(unregister_hw_breakpoint); */ struct perf_event ** register_wide_hw_breakpoint(struct perf_event_attr *attr, - perf_callback_t triggered) + perf_overflow_handler_t triggered) { struct perf_event **cpu_events, **pevent, *bp; long err; diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 6b7ddba..fd43ff4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4286,15 +4286,8 @@ static void bp_perf_event_destroy(struct perf_event *event) static const struct pmu *bp_perf_event_init(struct perf_event *bp) { int err; - /* - * The breakpoint is already filled if we haven't created the counter - * through perf syscall - * FIXME: manage to get trigerred to NULL if it comes from syscalls - */ - if (!bp->callback) - err = register_perf_hw_breakpoint(bp); - else - err = __register_perf_hw_breakpoint(bp); + + err = register_perf_hw_breakpoint(bp); if (err) return ERR_PTR(err); @@ -4390,7 +4383,7 @@ perf_event_alloc(struct perf_event_attr *attr, struct perf_event_context *ctx, struct perf_event *group_leader, struct perf_event *parent_event, - perf_callback_t callback, + perf_overflow_handler_t overflow_handler, gfp_t gfpflags) { const struct pmu *pmu; @@ -4433,10 +4426,10 @@ perf_event_alloc(struct perf_event_attr *attr, event->state = PERF_EVENT_STATE_INACTIVE; - if (!callback && parent_event) - callback = parent_event->callback; + if (!overflow_handler && parent_event) + overflow_handler = parent_event->overflow_handler; - event->callback = callback; + event->overflow_handler = overflow_handler; if (attr->disabled) event->state = PERF_EVENT_STATE_OFF; @@ -4776,7 +4769,8 @@ err_put_context: */ struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, - pid_t pid, perf_callback_t callback) + pid_t pid, + perf_overflow_handler_t overflow_handler) { struct perf_event *event; struct perf_event_context *ctx; @@ -4793,7 +4787,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu, } event = perf_event_alloc(attr, cpu, ctx, NULL, - NULL, callback, GFP_KERNEL); + NULL, overflow_handler, GFP_KERNEL); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_put_context; diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c index ddfa0fd..acb87d4 100644 --- a/kernel/trace/trace_ksym.c +++ b/kernel/trace/trace_ksym.c @@ -79,11 +79,12 @@ void ksym_collect_stats(unsigned long hbp_hit_addr) } #endif /* CONFIG_PROFILE_KSYM_TRACER */ -void ksym_hbp_handler(struct perf_event *hbp, void *data) +void ksym_hbp_handler(struct perf_event *hbp, int nmi, + struct perf_sample_data *data, + struct pt_regs *regs) { struct ring_buffer_event *event; struct ksym_trace_entry *entry; - struct pt_regs *regs = data; struct ring_buffer *buffer; int pc; -- cgit v1.1 From 56053170ea2a2c0dc17420e9b94aa3ca51d80408 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 7 Dec 2009 06:46:48 +0100 Subject: hw-breakpoints: Fix task-bound breakpoint slot allocation Whatever the context nature of a breakpoint, we always perform the following constraint checks before allocating it a slot: - Check the number of pinned breakpoint bound the concerned cpus - Check the max number of task-bound breakpoints that are belonging to a task. - Add both and see if we have a reamining slot for the new breakpoint This is the right thing to do when we are about to register a cpu-only bound breakpoint. But not if we are dealing with a task bound breakpoint. What we want in this case is: - Check the number of pinned breakpoint bound the concerned cpus - Check the number of breakpoints that already belong to the task in which the breakpoint to register is bound to. - Add both This fixes a regression that makes the "firefox -g" command fail to register breakpoints once we deal with a secondary thread. Reported-by: Walt Signed-off-by: Frederic Weisbecker Cc: Prasad --- kernel/hw_breakpoint.c | 74 ++++++++++++++++++++++++++++++-------------------- 1 file changed, 45 insertions(+), 29 deletions(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index b600fc2..02b4925 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -83,15 +83,51 @@ static unsigned int max_task_bp_pinned(int cpu) return 0; } +static int task_bp_pinned(struct task_struct *tsk) +{ + struct perf_event_context *ctx = tsk->perf_event_ctxp; + struct list_head *list; + struct perf_event *bp; + unsigned long flags; + int count = 0; + + if (WARN_ONCE(!ctx, "No perf context for this task")) + return 0; + + list = &ctx->event_list; + + spin_lock_irqsave(&ctx->lock, flags); + + /* + * The current breakpoint counter is not included in the list + * at the open() callback time + */ + list_for_each_entry(bp, list, event_entry) { + if (bp->attr.type == PERF_TYPE_BREAKPOINT) + count++; + } + + spin_unlock_irqrestore(&ctx->lock, flags); + + return count; +} + /* * Report the number of pinned/un-pinned breakpoints we have in * a given cpu (cpu > -1) or in all of them (cpu = -1). */ -static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) +static void +fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp) { + int cpu = bp->cpu; + struct task_struct *tsk = bp->ctx->task; + if (cpu >= 0) { slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu); - slots->pinned += max_task_bp_pinned(cpu); + if (!tsk) + slots->pinned += max_task_bp_pinned(cpu); + else + slots->pinned += task_bp_pinned(tsk); slots->flexible = per_cpu(nr_bp_flexible, cpu); return; @@ -101,7 +137,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) unsigned int nr; nr = per_cpu(nr_cpu_bp_pinned, cpu); - nr += max_task_bp_pinned(cpu); + if (!tsk) + nr += max_task_bp_pinned(cpu); + else + nr += task_bp_pinned(tsk); if (nr > slots->pinned) slots->pinned = nr; @@ -118,33 +157,10 @@ static void fetch_bp_busy_slots(struct bp_busy_slots *slots, int cpu) */ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) { - int count = 0; - struct perf_event *bp; - struct perf_event_context *ctx = tsk->perf_event_ctxp; unsigned int *tsk_pinned; - struct list_head *list; - unsigned long flags; - - if (WARN_ONCE(!ctx, "No perf context for this task")) - return; - - list = &ctx->event_list; - - spin_lock_irqsave(&ctx->lock, flags); - - /* - * The current breakpoint counter is not included in the list - * at the open() callback time - */ - list_for_each_entry(bp, list, event_entry) { - if (bp->attr.type == PERF_TYPE_BREAKPOINT) - count++; - } - - spin_unlock_irqrestore(&ctx->lock, flags); + int count = 0; - if (WARN_ONCE(count < 0, "No breakpoint counter found in the counter list")) - return; + count = task_bp_pinned(tsk); tsk_pinned = per_cpu(task_bp_pinned, cpu); if (enable) { @@ -233,7 +249,7 @@ int reserve_bp_slot(struct perf_event *bp) mutex_lock(&nr_bp_mutex); - fetch_bp_busy_slots(&slots, bp->cpu); + fetch_bp_busy_slots(&slots, bp); /* Flexible counters need to keep at least one slot */ if (slots.pinned + (!!slots.flexible) == HBP_NUM) { -- cgit v1.1 From 6ab8886326a1b9a3a8d164d8174e3c20703a03a2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 8 Dec 2009 18:25:15 +1100 Subject: perf: hw_breakpoints: Fix percpu namespace clash Today's linux-next build failed with: kernel/hw_breakpoint.c:86: error: 'task_bp_pinned' redeclared as different kind of symbol ... Caused by commit dd17c8f72993f9461e9c19250e3f155d6d99df22 ("percpu: remove per_cpu__ prefix") from the percpu tree interacting with commit 56053170ea2a2c0dc17420e9b94aa3ca51d80408 ("hw-breakpoints: Fix task-bound breakpoint slot allocation") from the tip tree. Signed-off-by: Stephen Rothwell Acked-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Tejun Heo Cc: Rusty Russell Cc: Christoph Lameter Cc: Andrew Morton Cc: Linus Torvalds LKML-Reference: <20091208182515.bb6dda4a.sfr@canb.auug.org.au> Signed-off-by: Ingo Molnar --- kernel/hw_breakpoint.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 02b4925..03a0773 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -52,7 +52,7 @@ static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned); /* Number of pinned task breakpoints in a cpu */ -static DEFINE_PER_CPU(unsigned int, task_bp_pinned[HBP_NUM]); +static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]); /* Number of non-pinned cpu/task breakpoints in a cpu */ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible); @@ -73,7 +73,7 @@ static DEFINE_MUTEX(nr_bp_mutex); static unsigned int max_task_bp_pinned(int cpu) { int i; - unsigned int *tsk_pinned = per_cpu(task_bp_pinned, cpu); + unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); for (i = HBP_NUM -1; i >= 0; i--) { if (tsk_pinned[i] > 0) @@ -162,7 +162,7 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable) count = task_bp_pinned(tsk); - tsk_pinned = per_cpu(task_bp_pinned, cpu); + tsk_pinned = per_cpu(nr_task_bp_pinned, cpu); if (enable) { tsk_pinned[count]++; if (count > 0) @@ -209,7 +209,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to a single cpu, check: * * (per_cpu(nr_bp_flexible, cpu) || (per_cpu(nr_cpu_bp_pinned, cpu) - * + max(per_cpu(task_bp_pinned, cpu)))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, cpu)))) < HBP_NUM * * -> If there are already non-pinned counters in this cpu, it means * there is already a free slot for them. @@ -220,7 +220,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to every cpus, check: * * (per_cpu(nr_bp_flexible, *) || (max(per_cpu(nr_cpu_bp_pinned, *)) - * + max(per_cpu(task_bp_pinned, *)))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, *)))) < HBP_NUM * * -> This is roughly the same, except we check the number of per cpu * bp for every cpu and we keep the max one. Same for the per tasks @@ -232,7 +232,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to a single cpu, check: * * ((per_cpu(nr_bp_flexible, cpu) > 1) + per_cpu(nr_cpu_bp_pinned, cpu) - * + max(per_cpu(task_bp_pinned, cpu))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, cpu))) < HBP_NUM * * -> Same checks as before. But now the nr_bp_flexible, if any, must keep * one register at least (or they will never be fed). @@ -240,7 +240,7 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable) * - If attached to every cpus, check: * * ((per_cpu(nr_bp_flexible, *) > 1) + max(per_cpu(nr_cpu_bp_pinned, *)) - * + max(per_cpu(task_bp_pinned, *))) < HBP_NUM + * + max(per_cpu(nr_task_bp_pinned, *))) < HBP_NUM */ int reserve_bp_slot(struct perf_event *bp) { -- cgit v1.1 From a7c312bed772c11138409c3a98531e85d690302e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 8 Dec 2009 17:03:16 -0500 Subject: trace-kprobe: Support delete probe syntax Support delete probe syntax. The syntax is "-:[group/]event". Signed-off-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE LKML-Reference: <20091208220316.10142.39192.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar Cc: Steven Rostedt Cc: Jim Keniston Cc: Ananth N Mavinakayanahalli Cc: Christoph Hellwig Cc: Frank Ch. Eigler Cc: Frederic Weisbecker Cc: Jason Baron Cc: K.Prasad Cc: Peter Zijlstra Cc: Srikar Dronamraju Cc: Arnaldo Carvalho de Melo --- kernel/trace/trace_kprobe.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index aff5f80..bf05fb4 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -606,23 +606,22 @@ static int create_trace_probe(int argc, char **argv) */ struct trace_probe *tp; int i, ret = 0; - int is_return = 0; + int is_return = 0, is_delete = 0; char *symbol = NULL, *event = NULL, *arg = NULL, *group = NULL; unsigned long offset = 0; void *addr = NULL; char buf[MAX_EVENT_NAME_LEN]; - if (argc < 2) { - pr_info("Probe point is not specified.\n"); - return -EINVAL; - } - + /* argc must be >= 1 */ if (argv[0][0] == 'p') is_return = 0; else if (argv[0][0] == 'r') is_return = 1; + else if (argv[0][0] == '-') + is_delete = 1; else { - pr_info("Probe definition must be started with 'p' or 'r'.\n"); + pr_info("Probe definition must be started with 'p', 'r' or" + " '-'.\n"); return -EINVAL; } @@ -642,7 +641,29 @@ static int create_trace_probe(int argc, char **argv) return -EINVAL; } } + if (!group) + group = KPROBE_EVENT_SYSTEM; + if (is_delete) { + if (!event) { + pr_info("Delete command needs an event name.\n"); + return -EINVAL; + } + tp = find_probe_event(event, group); + if (!tp) { + pr_info("Event %s/%s doesn't exist.\n", group, event); + return -ENOENT; + } + /* delete an event */ + unregister_trace_probe(tp); + free_trace_probe(tp); + return 0; + } + + if (argc < 2) { + pr_info("Probe point is not specified.\n"); + return -EINVAL; + } if (isdigit(argv[1][0])) { if (is_return) { pr_info("Return probe point must be a symbol.\n"); @@ -671,8 +692,6 @@ static int create_trace_probe(int argc, char **argv) argc -= 2; argv += 2; /* setup a probe */ - if (!group) - group = KPROBE_EVENT_SYSTEM; if (!event) { /* Make a new event name */ if (symbol) -- cgit v1.1 From 44234adcdce38f83c56e05f808ce656175b4beeb Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 9 Dec 2009 09:25:48 +0100 Subject: hw-breakpoints: Modify breakpoints without unregistering them Currently, when ptrace needs to modify a breakpoint, like disabling it, changing its address, type or len, it calls modify_user_hw_breakpoint(). This latter will perform the heavy and racy task of unregistering the old breakpoint and registering a new one. This is racy as someone else might steal the reserved breakpoint slot under us, which is undesired as the breakpoint is only supposed to be modified, sometimes in the middle of a debugging workflow. We don't want our slot to be stolen in the middle. So instead of unregistering/registering the breakpoint, just disable it while we modify its breakpoint fields and re-enable it after if necessary. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Prasad LKML-Reference: <1260347148-5519-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/hw_breakpoint.c | 42 ++++++++++++++++++++++++++++++++---------- kernel/perf_event.c | 4 ++-- 2 files changed, 34 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index 03a0773..366eedf 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -320,18 +320,40 @@ EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); * @triggered: callback to trigger when we hit the breakpoint * @tsk: pointer to 'task_struct' of the process to which the address belongs */ -struct perf_event * -modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) +int modify_user_hw_breakpoint(struct perf_event *bp, struct perf_event_attr *attr) { - /* - * FIXME: do it without unregistering - * - We don't want to lose our slot - * - If the new bp is incorrect, don't lose the older one - */ - unregister_hw_breakpoint(bp); + u64 old_addr = bp->attr.bp_addr; + int old_type = bp->attr.bp_type; + int old_len = bp->attr.bp_len; + int err = 0; + + perf_event_disable(bp); + + bp->attr.bp_addr = attr->bp_addr; + bp->attr.bp_type = attr->bp_type; + bp->attr.bp_len = attr->bp_len; + + if (attr->disabled) + goto end; - return perf_event_create_kernel_counter(attr, -1, bp->ctx->task->pid, - bp->overflow_handler); + err = arch_validate_hwbkpt_settings(bp, bp->ctx->task); + if (!err) + perf_event_enable(bp); + + if (err) { + bp->attr.bp_addr = old_addr; + bp->attr.bp_type = old_type; + bp->attr.bp_len = old_len; + if (!bp->attr.disabled) + perf_event_enable(bp); + + return err; + } + +end: + bp->attr.disabled = attr->disabled; + + return 0; } EXPORT_SYMBOL_GPL(modify_user_hw_breakpoint); diff --git a/kernel/perf_event.c b/kernel/perf_event.c index fd43ff4..3b0cf86 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -567,7 +567,7 @@ static void __perf_event_disable(void *info) * is the current context on this CPU and preemption is disabled, * hence we can't get into perf_event_task_sched_out for this context. */ -static void perf_event_disable(struct perf_event *event) +void perf_event_disable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; @@ -971,7 +971,7 @@ static void __perf_event_enable(void *info) * perf_event_for_each_child or perf_event_for_each as described * for perf_event_disable. */ -static void perf_event_enable(struct perf_event *event) +void perf_event_enable(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; struct task_struct *task = ctx->task; -- cgit v1.1 From aa5452d70c0d559310598b243b8b1033c10056e7 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 9 Dec 2009 11:28:13 +0800 Subject: perf_event: Clean up __perf_event_init_context() Clean up the code a bit: - define 'perf_cpu_context' variable with 'static' - use kzalloc() instead of kmalloc() and memset() Signed-off-by: Xiao Guangrong Reviewed-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B1F194D.7080306@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3b0cf86..2b06c45 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -36,7 +36,7 @@ /* * Each CPU has a list of per CPU events: */ -DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); +static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); int perf_max_events __read_mostly = 1; static int perf_reserved_percpu __read_mostly; @@ -1579,7 +1579,6 @@ static void __perf_event_init_context(struct perf_event_context *ctx, struct task_struct *task) { - memset(ctx, 0, sizeof(*ctx)); spin_lock_init(&ctx->lock); mutex_init(&ctx->mutex); INIT_LIST_HEAD(&ctx->group_list); @@ -1654,7 +1653,7 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu) } if (!ctx) { - ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); + ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); err = -ENOMEM; if (!ctx) goto errout; @@ -5105,7 +5104,7 @@ int perf_event_init_task(struct task_struct *child) * First allocate and initialize a context for the child. */ - child_ctx = kmalloc(sizeof(struct perf_event_context), GFP_KERNEL); + child_ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); if (!child_ctx) return -ENOMEM; -- cgit v1.1 From b93f7978ad6b46133e9453b90ccc057dc2429e75 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 9 Dec 2009 11:29:44 +0800 Subject: perf_event: Allocate children's perf_event_ctxp at the right time In current code, children task will allocate memory for 'child->perf_event_ctxp' if the parent is counted, we can do it only if the parent allowed children inherit it. It can save memory and reduce overhead. Signed-off-by: Xiao Guangrong Reviewed-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B1F19A8.5040805@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 2b06c45..77641ae 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5083,7 +5083,7 @@ again: */ int perf_event_init_task(struct task_struct *child) { - struct perf_event_context *child_ctx, *parent_ctx; + struct perf_event_context *child_ctx = NULL, *parent_ctx; struct perf_event_context *cloned_ctx; struct perf_event *event; struct task_struct *parent = current; @@ -5099,20 +5099,6 @@ int perf_event_init_task(struct task_struct *child) return 0; /* - * This is executed from the parent task context, so inherit - * events that have been marked for cloning. - * First allocate and initialize a context for the child. - */ - - child_ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL); - if (!child_ctx) - return -ENOMEM; - - __perf_event_init_context(child_ctx, child); - child->perf_event_ctxp = child_ctx; - get_task_struct(child); - - /* * If the parent's context is a clone, pin it so it won't get * swapped under us. */ @@ -5142,6 +5128,26 @@ int perf_event_init_task(struct task_struct *child) continue; } + if (!child->perf_event_ctxp) { + /* + * This is executed from the parent task context, so + * inherit events that have been marked for cloning. + * First allocate and initialize a context for the + * child. + */ + + child_ctx = kzalloc(sizeof(struct perf_event_context), + GFP_KERNEL); + if (!child_ctx) { + ret = -ENOMEM; + goto exit; + } + + __perf_event_init_context(child_ctx, child); + child->perf_event_ctxp = child_ctx; + get_task_struct(child); + } + ret = inherit_group(event, parent, parent_ctx, child, child_ctx); if (ret) { @@ -5170,6 +5176,7 @@ int perf_event_init_task(struct task_struct *child) get_ctx(child_ctx->parent_ctx); } +exit: mutex_unlock(&parent_ctx->mutex); perf_unpin_context(parent_ctx); -- cgit v1.1 From ec89a06fd4e12301f11ab039ee07d2353a18addc Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Wed, 9 Dec 2009 11:30:36 +0800 Subject: perf_event: Cleanup for cpu_clock_perf_event_update() Using atomic64_xchg() instead of atomic64_read() and atomic64_set(). Signed-off-by: Xiao Guangrong Reviewed-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Paul Mackerras LKML-Reference: <4B1F19DC.90204@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 77641ae..94e1b28 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4079,8 +4079,7 @@ static void cpu_clock_perf_event_update(struct perf_event *event) u64 now; now = cpu_clock(cpu); - prev = atomic64_read(&event->hw.prev_count); - atomic64_set(&event->hw.prev_count, now); + prev = atomic64_xchg(&event->hw.prev_count, now); atomic64_add(now - prev, &event->count); } -- cgit v1.1 From 822a6961112f0c9101d3359d8524604c3309ee6c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 8 Dec 2009 10:00:04 +0100 Subject: tracing/kprobes: Fix field creation's bad error handling When we define the common event fields in kprobe, we invert the error handling and return immediately in case of success. Then we omit to define specific kprobes fields (ip and nargs), and specific kretprobes fields (func, ret_ip, nargs). And we only define them when we fail to create common fields. The most visible consequence is that we can't create filter for k(ret)probes specific fields. This patch re-invert the success/error handling to fix it. Reported-by: Lai Jiangshan Signed-off-by: Frederic Weisbecker Acked-by: Masami Hiramatsu Cc: Steven Rostedt Cc: Li Zefan LKML-Reference: <1260263815-5167-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- kernel/trace/trace_kprobe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index bf05fb4..b52d397 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1133,7 +1133,7 @@ static int kprobe_event_define_fields(struct ftrace_event_call *event_call) struct trace_probe *tp = (struct trace_probe *)event_call->data; ret = trace_define_common_fields(event_call); - if (!ret) + if (ret) return ret; DEFINE_FIELD(unsigned long, ip, FIELD_STRING_IP, 0); @@ -1151,7 +1151,7 @@ static int kretprobe_event_define_fields(struct ftrace_event_call *event_call) struct trace_probe *tp = (struct trace_probe *)event_call->data; ret = trace_define_common_fields(event_call); - if (!ret) + if (ret) return ret; DEFINE_FIELD(unsigned long, func, FIELD_STRING_FUNC, 0); -- cgit v1.1 From 21140f4d3387aa2213f1deea0128df1dbf924379 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 10 Dec 2009 14:00:51 +0800 Subject: perf_event: Fix perf_swevent_hrtimer() variable initialization fix: [] ? printk+0x1d/0x24 [] ? perf_prepare_sample+0x269/0x280 [] warn_slowpath_common+0x71/0xd0 [] ? perf_prepare_sample+0x269/0x280 [] warn_slowpath_null+0x1a/0x20 [] perf_prepare_sample+0x269/0x280 [] ? cpu_clock+0x53/0x90 [] __perf_event_overflow+0x2a8/0x300 [] perf_event_overflow+0x1b/0x30 [] perf_swevent_hrtimer+0x7f/0x120 This is because 'data.raw' variable not initialize. Signed-off-by: Xiao Guangrong Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <4B208E93.1010801@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 94e1b28..3a5d6c4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4010,6 +4010,7 @@ static enum hrtimer_restart perf_swevent_hrtimer(struct hrtimer *hrtimer) event->pmu->read(event); data.addr = 0; + data.raw = NULL; data.period = event->hw.last_period; regs = get_irq_regs(); /* -- cgit v1.1 From 5e855db5d8fec44e6604eb245aa9077bbd3f0d05 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Thu, 10 Dec 2009 17:08:54 +0800 Subject: perf_event: Fix variable initialization in other codepaths Signed-off-by: Xiao Guangrong Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras LKML-Reference: <4B20BAA6.7010609@cn.fujitsu.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 3a5d6c4..d891ec4 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -4300,6 +4300,7 @@ void perf_bp_event(struct perf_event *bp, void *data) struct perf_sample_data sample; struct pt_regs *regs = data; + sample.raw = NULL; sample.addr = bp->attr.bp_addr; if (!perf_exclude_event(bp, regs)) -- cgit v1.1