From e9b3cc1b3779fe10a80de4c3e7404bd308d0eae3 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Thu, 13 Aug 2009 05:19:44 +0000 Subject: net: skb ftracer - add tracepoint to skb_copy_datagram_iovec (v3) skb allocation / cosumption tracer - Add consumption tracepoint This patch adds a tracepoint to skb_copy_datagram_iovec, which is called each time a userspace process copies a frame from a socket receive queue to a user space buffer. It allows us to hook in and examine each sk_buff that the system receives on a per-socket bases, and can be use to compile a list of which skb's were received by which processes. Signed-off-by: Neil Horman include/trace/events/skb.h | 20 ++++++++++++++++++++ net/core/datagram.c | 3 +++ 2 files changed, 23 insertions(+) Signed-off-by: David S. Miller --- include/trace/events/skb.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index e499863..4b2be6d 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -5,6 +5,7 @@ #define _TRACE_SKB_H #include +#include #include /* @@ -34,6 +35,25 @@ TRACE_EVENT(kfree_skb, __entry->skbaddr, __entry->protocol, __entry->location) ); +TRACE_EVENT(skb_copy_datagram_iovec, + + TP_PROTO(const struct sk_buff *skb, int len), + + TP_ARGS(skb, len), + + TP_STRUCT__entry( + __field( const void *, skbaddr ) + __field( int, len ) + ), + + TP_fast_assign( + __entry->skbaddr = skb; + __entry->len = len; + ), + + TP_printk("skbaddr=%p len=%d", __entry->skbaddr, __entry->len) +); + #endif /* _TRACE_SKB_H */ /* This part must be outside protection */ -- cgit v1.1 From 2b022e3d4bf9885f781221c59d86283a2cdfc2ed Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:48:59 +0800 Subject: timers: Add tracepoints for timer_list timers Add tracepoints which cover the timer life cycle. The tracepoints are integrated with the already existing debug_object debug points as far as possible. Based on patches from Mathieu: http://marc.info/?l=linux-kernel&m=123791201816247&w=2 and Anton: http://marc.info/?l=linux-kernel&m=124331396919301&w=2 [ tglx: Fixed timeout value in timer_start tracepoint, massaged comments and made the printk's more readable ] Signed-off-by: Xiao Guangrong Cc: Anton Blanchard Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8A9B.3040201@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 137 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) create mode 100644 include/trace/events/timer.h (limited to 'include/trace') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h new file mode 100644 index 0000000..725892a --- /dev/null +++ b/include/trace/events/timer.h @@ -0,0 +1,137 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM timer + +#if !defined(_TRACE_TIMER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_TIMER_H + +#include +#include + +/** + * timer_init - called when the timer is initialized + * @timer: pointer to struct timer_list + */ +TRACE_EVENT(timer_init, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +/** + * timer_start - called when the timer is started + * @timer: pointer to struct timer_list + * @expires: the timers expiry time + */ +TRACE_EVENT(timer_start, + + TP_PROTO(struct timer_list *timer, unsigned long expires), + + TP_ARGS(timer, expires), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( void *, function ) + __field( unsigned long, expires ) + __field( unsigned long, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->function = timer->function; + __entry->expires = expires; + __entry->now = jiffies; + ), + + TP_printk("timer %p: func %pf, expires %lu, timeout %ld", + __entry->timer, __entry->function, __entry->expires, + (long)__entry->expires - __entry->now) +); + +/** + * timer_expire_entry - called immediately before the timer callback + * @timer: pointer to struct timer_list + * + * Allows to determine the timer latency. + */ +TRACE_EVENT(timer_expire_entry, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( unsigned long, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->now = jiffies; + ), + + TP_printk("timer %p: now %lu", __entry->timer, __entry->now) +); + +/** + * timer_expire_exit - called immediately after the timer callback returns + * @timer: pointer to struct timer_list + * + * When used in combination with the timer_expire_entry tracepoint we can + * determine the runtime of the timer callback function. + * + * NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might + * be invalid. We solely track the pointer. + */ +TRACE_EVENT(timer_expire_exit, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field(void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +/** + * timer_cancel - called when the timer is canceled + * @timer: pointer to struct timer_list + */ +TRACE_EVENT(timer_cancel, + + TP_PROTO(struct timer_list *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("timer %p", __entry->timer) +); + +#endif /* _TRACE_TIMER_H */ + +/* This part must be outside protection */ +#include -- cgit v1.1 From c6a2a1770245f654f35f60e1458d4356680f9519 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:51:23 +0800 Subject: hrtimer: Add tracepoint for hrtimers Add tracepoints which cover the life cycle of a hrtimer. The tracepoints are integrated with the already existing debug_object debug points as far as possible. [ tglx: Fixed comments, made output conistent, easier to read and parse. Fixed output for 32bit archs which do not use the scalar representation of ktime_t. Hand current time to trace_hrtimer_expiry_entry instead of calling get_time() inside of the trace assignment. ] Signed-off-by: Xiao Guangrong Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Anton Blanchard Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8B2B.5020908@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 139 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 139 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index 725892a..df3c07f 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -5,6 +5,7 @@ #define _TRACE_TIMER_H #include +#include #include /** @@ -131,6 +132,144 @@ TRACE_EVENT(timer_cancel, TP_printk("timer %p", __entry->timer) ); +/** + * hrtimer_init - called when the hrtimer is initialized + * @timer: pointer to struct hrtimer + * @clockid: the hrtimers clock + * @mode: the hrtimers mode + */ +TRACE_EVENT(hrtimer_init, + + TP_PROTO(struct hrtimer *timer, clockid_t clockid, + enum hrtimer_mode mode), + + TP_ARGS(timer, clockid, mode), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( clockid_t, clockid ) + __field( enum hrtimer_mode, mode ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->clockid = clockid; + __entry->mode = mode; + ), + + TP_printk("hrtimer %p, clockid %s, mode %s", __entry->timer, + __entry->clockid == CLOCK_REALTIME ? + "CLOCK_REALTIME" : "CLOCK_MONOTONIC", + __entry->mode == HRTIMER_MODE_ABS ? + "HRTIMER_MODE_ABS" : "HRTIMER_MODE_REL") +); + +/** + * hrtimer_start - called when the hrtimer is started + * @timer: pointer to struct hrtimer + */ +TRACE_EVENT(hrtimer_start, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( void *, function ) + __field( s64, expires ) + __field( s64, softexpires ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->function = timer->function; + __entry->expires = hrtimer_get_expires(timer).tv64; + __entry->softexpires = hrtimer_get_softexpires(timer).tv64; + ), + + TP_printk("hrtimer %p, func %pf, expires %llu, softexpires %llu", + __entry->timer, __entry->function, + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->expires }), + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->softexpires })) +); + +/** + * htimmer_expire_entry - called immediately before the hrtimer callback + * @timer: pointer to struct hrtimer + * @now: pointer to variable which contains current time of the + * timers base. + * + * Allows to determine the timer latency. + */ +TRACE_EVENT(hrtimer_expire_entry, + + TP_PROTO(struct hrtimer *timer, ktime_t *now), + + TP_ARGS(timer, now), + + TP_STRUCT__entry( + __field( void *, timer ) + __field( s64, now ) + ), + + TP_fast_assign( + __entry->timer = timer; + __entry->now = now->tv64; + ), + + TP_printk("hrtimer %p, now %llu", __entry->timer, + (unsigned long long)ktime_to_ns((ktime_t) { + .tv64 = __entry->now })) + ); + +/** + * hrtimer_expire_exit - called immediately after the hrtimer callback returns + * @timer: pointer to struct hrtimer + * + * When used in combination with the hrtimer_expire_entry tracepoint we can + * determine the runtime of the callback function. + */ +TRACE_EVENT(hrtimer_expire_exit, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("hrtimer %p", __entry->timer) +); + +/** + * hrtimer_cancel - called when the hrtimer is canceled + * @timer: pointer to struct hrtimer + */ +TRACE_EVENT(hrtimer_cancel, + + TP_PROTO(struct hrtimer *timer), + + TP_ARGS(timer), + + TP_STRUCT__entry( + __field( void *, timer ) + ), + + TP_fast_assign( + __entry->timer = timer; + ), + + TP_printk("hrtimer %p", __entry->timer) +); + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ -- cgit v1.1 From 3f0a525ebf4b8ef041a332bbe4a73aee94bb064b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 10 Aug 2009 10:52:30 +0800 Subject: itimers: Add tracepoints for itimer Add tracepoints for all itimer variants: ITIMER_REAL, ITIMER_VIRTUAL and ITIMER_PROF. [ tglx: Fixed comments and made the output more readable, parseable and consistent. Replaced pid_vnr by pid_nr because the hrtimer callback can happen in any namespace ] Signed-off-by: Xiao Guangrong Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Mathieu Desnoyers Cc: Anton Blanchard Cc: Peter Zijlstra Cc: KOSAKI Motohiro Cc: Zhaolei LKML-Reference: <4A7F8B6E.2010109@cn.fujitsu.com> Signed-off-by: Thomas Gleixner --- include/trace/events/timer.h | 66 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h index df3c07f..1844c48 100644 --- a/include/trace/events/timer.h +++ b/include/trace/events/timer.h @@ -270,6 +270,72 @@ TRACE_EVENT(hrtimer_cancel, TP_printk("hrtimer %p", __entry->timer) ); +/** + * itimer_state - called when itimer is started or canceled + * @which: name of the interval timer + * @value: the itimers value, itimer is canceled if value->it_value is + * zero, otherwise it is started + * @expires: the itimers expiry time + */ +TRACE_EVENT(itimer_state, + + TP_PROTO(int which, const struct itimerval *const value, + cputime_t expires), + + TP_ARGS(which, value, expires), + + TP_STRUCT__entry( + __field( int, which ) + __field( cputime_t, expires ) + __field( long, value_sec ) + __field( long, value_usec ) + __field( long, interval_sec ) + __field( long, interval_usec ) + ), + + TP_fast_assign( + __entry->which = which; + __entry->expires = expires; + __entry->value_sec = value->it_value.tv_sec; + __entry->value_usec = value->it_value.tv_usec; + __entry->interval_sec = value->it_interval.tv_sec; + __entry->interval_usec = value->it_interval.tv_usec; + ), + + TP_printk("which %d, expires %lu, it_value %lu.%lu, it_interval %lu.%lu", + __entry->which, __entry->expires, + __entry->value_sec, __entry->value_usec, + __entry->interval_sec, __entry->interval_usec) +); + +/** + * itimer_expire - called when itimer expires + * @which: type of the interval timer + * @pid: pid of the process which owns the timer + * @now: current time, used to calculate the latency of itimer + */ +TRACE_EVENT(itimer_expire, + + TP_PROTO(int which, struct pid *pid, cputime_t now), + + TP_ARGS(which, pid, now), + + TP_STRUCT__entry( + __field( int , which ) + __field( pid_t, pid ) + __field( cputime_t, now ) + ), + + TP_fast_assign( + __entry->which = which; + __entry->now = now; + __entry->pid = pid_nr(pid); + ), + + TP_printk("which %d, pid %d, now %lu", __entry->which, + (int) __entry->pid, __entry->now) +); + #endif /* _TRACE_TIMER_H */ /* This part must be outside protection */ -- cgit v1.1 From 98a56ab382079f777e261e14512cbd4fb2107af4 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Sep 2009 08:48:28 -0400 Subject: ext4: Fix spelling typo in the trace format for trace_ext4_da_writepages() Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 8d433c4e..15051d2 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -243,7 +243,7 @@ TRACE_EVENT(ext4_da_writepages, __entry->range_cyclic = wbc->range_cyclic; ), - TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", + TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, -- cgit v1.1 From b3a3ca8ca0c3c29abc5b2bfe94bb14f3f4590df9 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 31 Aug 2009 23:13:11 -0400 Subject: ext4: Add new tracepoint: trace_ext4_da_write_pages() Add a new tracepoint which shows the pages that will be written using write_cache_pages() by ext4_da_writepages(). Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 15051d2..dd43399 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -251,6 +251,40 @@ TRACE_EVENT(ext4_da_writepages, __entry->range_cyclic) ); +TRACE_EVENT(ext4_da_write_pages, + TP_PROTO(struct inode *inode, struct mpage_da_data *mpd), + + TP_ARGS(inode, mpd), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( __u64, b_blocknr ) + __field( __u32, b_size ) + __field( __u32, b_state ) + __field( unsigned long, first_page ) + __field( int, io_done ) + __field( int, pages_written ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->b_blocknr = mpd->b_blocknr; + __entry->b_size = mpd->b_size; + __entry->b_state = mpd->b_state; + __entry->first_page = mpd->first_page; + __entry->io_done = mpd->io_done; + __entry->pages_written = mpd->pages_written; + ), + + TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", + jbd2_dev_to_name(__entry->dev), __entry->ino, + __entry->b_blocknr, __entry->b_size, + __entry->b_state, __entry->first_page, + __entry->io_done, __entry->pages_written) +); + TRACE_EVENT(ext4_da_writepages_result, TP_PROTO(struct inode *inode, struct writeback_control *wbc, int ret, int pages_written), -- cgit v1.1 From 768d0c27226e6587cad2fcf543f9711da3f3774e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2009 20:13:26 +0200 Subject: sched: Add wait, sleep and iowait accounting tracepoints Add 3 schedstat tracepoints to help account for wait-time, sleep-time and iowait-time. They can also be used as a perf-counter source to profile tasks on these clocks. Signed-off-by: Peter Zijlstra Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: [ build fix for the !CONFIG_SCHEDSTATS case ] Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 95 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 95 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index 8949bb7..a4c369e 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -340,6 +340,101 @@ TRACE_EVENT(sched_signal_send, __entry->sig, __entry->comm, __entry->pid) ); +/* + * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE + * adding sched_stat support to SCHED_FIFO/RR would be welcome. + */ + +/* + * Tracepoint for accounting wait time (time the task is runnable + * but not actually running due to scheduler contention). + */ +TRACE_EVENT(sched_stat_wait, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d wait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + +/* + * Tracepoint for accounting sleep time (time the task is not runnable, + * including iowait, see below). + */ +TRACE_EVENT(sched_stat_sleep, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d sleep: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + +/* + * Tracepoint for accounting iowait time (time the task is not runnable + * due to waiting on IO to complete). + */ +TRACE_EVENT(sched_stat_iowait, + + TP_PROTO(struct task_struct *tsk, u64 delay), + + TP_ARGS(tsk, delay), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, delay ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->delay = delay; + ) + TP_perf_assign( + __perf_count(delay); + ), + + TP_printk("task: %s:%d iowait: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->delay) +); + #endif /* _TRACE_SCHED_H */ /* This part must be outside protection */ -- cgit v1.1 From a3710fd1ee8cd542c5de63cf2c39f8912031f867 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 17 Sep 2009 08:50:18 -0400 Subject: ext4: fix tracepoint format string warnings Unlike on some other architectures ino_t is an unsigned int on s390. So add an explicit cast to avoid lots of compile warnings: In file included from include/trace/ftrace.h:285, from include/trace/define_trace.h:61, from include/trace/events/ext4.h:711, from fs/ext4/super.c:50: include/trace/events/ext4.h: In function 'ftrace_raw_output_ext4_free_inode': include/trace/events/ext4.h:12: warning: format '%lu' expects type 'long unsigned int', but argument 4 has type 'ino_t' Signed-off-by: Heiko Carstens Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 75 ++++++++++++++++++++++++--------------------- include/trace/events/jbd2.h | 2 +- 2 files changed, 41 insertions(+), 36 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index dd43399..68b53c7 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -33,8 +33,8 @@ TRACE_EVENT(ext4_free_inode, ), TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, - __entry->uid, __entry->gid, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->mode, __entry->uid, __entry->gid, (unsigned long long) __entry->blocks) ); @@ -56,7 +56,8 @@ TRACE_EVENT(ext4_request_inode, ), TP_printk("dev %s dir %lu mode %d", - jbd2_dev_to_name(__entry->dev), __entry->dir, __entry->mode) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir, + __entry->mode) ); TRACE_EVENT(ext4_allocate_inode, @@ -79,7 +80,8 @@ TRACE_EVENT(ext4_allocate_inode, ), TP_printk("dev %s ino %lu dir %lu mode %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->dir, __entry->mode) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + (unsigned long) __entry->dir, __entry->mode) ); TRACE_EVENT(ext4_write_begin, @@ -106,8 +108,8 @@ TRACE_EVENT(ext4_write_begin, ), TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->flags) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->flags) ); TRACE_EVENT(ext4_ordered_write_end, @@ -133,8 +135,8 @@ TRACE_EVENT(ext4_ordered_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_writeback_write_end, @@ -160,8 +162,8 @@ TRACE_EVENT(ext4_writeback_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_journalled_write_end, @@ -186,8 +188,8 @@ TRACE_EVENT(ext4_journalled_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_writepage, @@ -209,7 +211,8 @@ TRACE_EVENT(ext4_writepage, ), TP_printk("dev %s ino %lu page_index %lu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->index) ); TRACE_EVENT(ext4_da_writepages, @@ -244,7 +247,8 @@ TRACE_EVENT(ext4_da_writepages, ), TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, + jbd2_dev_to_name(__entry->dev), + (unsigned long) __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, __entry->for_kupdate, __entry->for_reclaim, @@ -279,7 +283,7 @@ TRACE_EVENT(ext4_da_write_pages, ), TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, __entry->b_blocknr, __entry->b_size, __entry->b_state, __entry->first_page, __entry->io_done, __entry->pages_written) @@ -314,7 +318,8 @@ TRACE_EVENT(ext4_da_writepages_result, ), TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld congestion %d more_io %d no_nrwrite_index_update %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->ret, + jbd2_dev_to_name(__entry->dev), + (unsigned long) __entry->ino, __entry->ret, __entry->pages_written, __entry->pages_skipped, __entry->encountered_congestion, __entry->more_io, __entry->no_nrwrite_index_update) @@ -343,8 +348,8 @@ TRACE_EVENT(ext4_da_write_begin, ), TP_printk("dev %s ino %lu pos %llu len %u flags %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->flags) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->flags) ); TRACE_EVENT(ext4_da_write_end, @@ -370,8 +375,8 @@ TRACE_EVENT(ext4_da_write_end, ), TP_printk("dev %s ino %lu pos %llu len %u copied %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pos, __entry->len, - __entry->copied) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pos, __entry->len, __entry->copied) ); TRACE_EVENT(ext4_discard_blocks, @@ -421,8 +426,8 @@ TRACE_EVENT(ext4_mb_new_inode_pa, ), TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, - __entry->pa_len, __entry->pa_lstart) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); TRACE_EVENT(ext4_mb_new_group_pa, @@ -449,8 +454,8 @@ TRACE_EVENT(ext4_mb_new_group_pa, ), TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->pa_pstart, - __entry->pa_len, __entry->pa_lstart) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart) ); TRACE_EVENT(ext4_mb_release_inode_pa, @@ -476,8 +481,8 @@ TRACE_EVENT(ext4_mb_release_inode_pa, ), TP_printk("dev %s ino %lu block %llu count %u", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, - __entry->count) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->block, __entry->count) ); TRACE_EVENT(ext4_mb_release_group_pa, @@ -522,7 +527,7 @@ TRACE_EVENT(ext4_discard_preallocations, ), TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), __entry->ino) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) ); TRACE_EVENT(ext4_mb_discard_preallocations, @@ -577,8 +582,8 @@ TRACE_EVENT(ext4_request_blocks, ), TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, - __entry->len, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->flags, __entry->len, (unsigned long long) __entry->logical, (unsigned long long) __entry->goal, (unsigned long long) __entry->lleft, @@ -621,8 +626,8 @@ TRACE_EVENT(ext4_allocate_blocks, ), TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->flags, - __entry->len, __entry->block, + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->flags, __entry->len, __entry->block, (unsigned long long) __entry->logical, (unsigned long long) __entry->goal, (unsigned long long) __entry->lleft, @@ -655,8 +660,8 @@ TRACE_EVENT(ext4_free_blocks, ), TP_printk("dev %s ino %lu block %llu count %lu metadata %d", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->block, - __entry->count, __entry->metadata) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->block, __entry->count, __entry->metadata) ); TRACE_EVENT(ext4_sync_file, @@ -679,8 +684,8 @@ TRACE_EVENT(ext4_sync_file, ), TP_printk("dev %s ino %ld parent %ld datasync %d ", - jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->parent, - __entry->datasync) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + (unsigned long) __entry->parent, __entry->datasync) ); TRACE_EVENT(ext4_sync_fs, diff --git a/include/trace/events/jbd2.h b/include/trace/events/jbd2.h index 10813fa..b851f0b 100644 --- a/include/trace/events/jbd2.h +++ b/include/trace/events/jbd2.h @@ -159,7 +159,7 @@ TRACE_EVENT(jbd2_submit_inode_data, ), TP_printk("dev %s ino %lu", - jbd2_dev_to_name(__entry->dev), __entry->ino) + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino) ); #endif /* _TRACE_JBD2_H */ -- cgit v1.1 From 229456fc34b1c9031b04f7581e7b755d1cebfe9c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Wed, 17 Jun 2009 09:22:14 -0300 Subject: KVM: convert custom marker based tracing to event traces This allows use of the powerful ftrace infrastructure. See Documentation/trace/ for usage information. [avi, stephen: various build fixes] [sheng: fix control register breakage] Signed-off-by: Marcelo Tosatti Signed-off-by: Stephen Rothwell Signed-off-by: Sheng Yang Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/trace/events/kvm.h (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h new file mode 100644 index 0000000..d74b23d --- /dev/null +++ b/include/trace/events/kvm.h @@ -0,0 +1,57 @@ +#if !defined(_TRACE_KVM_MAIN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_MAIN_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm +#define TRACE_INCLUDE_FILE kvm + +#if defined(__KVM_HAVE_IOAPIC) +TRACE_EVENT(kvm_set_irq, + TP_PROTO(unsigned int gsi), + TP_ARGS(gsi), + + TP_STRUCT__entry( + __field( unsigned int, gsi ) + ), + + TP_fast_assign( + __entry->gsi = gsi; + ), + + TP_printk("gsi %u", __entry->gsi) +); + + +#define kvm_irqchips \ + {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ + {KVM_IRQCHIP_PIC_SLAVE, "PIC slave"}, \ + {KVM_IRQCHIP_IOAPIC, "IOAPIC"} + +TRACE_EVENT(kvm_ack_irq, + TP_PROTO(unsigned int irqchip, unsigned int pin), + TP_ARGS(irqchip, pin), + + TP_STRUCT__entry( + __field( unsigned int, irqchip ) + __field( unsigned int, pin ) + ), + + TP_fast_assign( + __entry->irqchip = irqchip; + __entry->pin = pin; + ), + + TP_printk("irqchip %s pin %u", + __print_symbolic(__entry->irqchip, kvm_irqchips), + __entry->pin) +); + + + +#endif /* defined(__KVM_HAVE_IOAPIC) */ +#endif /* _TRACE_KVM_MAIN_H */ + +/* This part must be outside protection */ +#include -- cgit v1.1 From ae8c1c4025c2b780616586c3f86a3374a154ef90 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 1 Jul 2009 12:09:41 +0300 Subject: KVM: Trace irq level and source id Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index d74b23d..035232d 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -9,18 +9,23 @@ #if defined(__KVM_HAVE_IOAPIC) TRACE_EVENT(kvm_set_irq, - TP_PROTO(unsigned int gsi), - TP_ARGS(gsi), + TP_PROTO(unsigned int gsi, int level, int irq_source_id), + TP_ARGS(gsi, level, irq_source_id), TP_STRUCT__entry( __field( unsigned int, gsi ) + __field( int, level ) + __field( int, irq_source_id ) ), TP_fast_assign( __entry->gsi = gsi; + __entry->level = level; + __entry->irq_source_id = irq_source_id; ), - TP_printk("gsi %u", __entry->gsi) + TP_printk("gsi %u level %d source %d", + __entry->gsi, __entry->level, __entry->irq_source_id) ); -- cgit v1.1 From aec51dc4f1584018d7e35269e04e3dde3d2033e6 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 1 Jul 2009 16:01:02 +0300 Subject: KVM: Trace mmio Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 035232d..77022af 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -56,6 +56,39 @@ TRACE_EVENT(kvm_ack_irq, #endif /* defined(__KVM_HAVE_IOAPIC) */ + +#define KVM_TRACE_MMIO_READ_UNSATISFIED 0 +#define KVM_TRACE_MMIO_READ 1 +#define KVM_TRACE_MMIO_WRITE 2 + +#define kvm_trace_symbol_mmio \ + { KVM_TRACE_MMIO_READ_UNSATISFIED, "unsatisfied-read" }, \ + { KVM_TRACE_MMIO_READ, "read" }, \ + { KVM_TRACE_MMIO_WRITE, "write" } + +TRACE_EVENT(kvm_mmio, + TP_PROTO(int type, int len, u64 gpa, u64 val), + TP_ARGS(type, len, gpa, val), + + TP_STRUCT__entry( + __field( u32, type ) + __field( u32, len ) + __field( u64, gpa ) + __field( u64, val ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->len = len; + __entry->gpa = gpa; + __entry->val = val; + ), + + TP_printk("mmio %s len %u gpa 0x%llx val 0x%llx", + __print_symbolic(__entry->type, kvm_trace_symbol_mmio), + __entry->len, __entry->gpa, __entry->val) +); + #endif /* _TRACE_KVM_MAIN_H */ /* This part must be outside protection */ -- cgit v1.1 From 1000ff8d893765d7b56e32fe16dbe4814f172588 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 7 Jul 2009 16:00:57 +0300 Subject: KVM: Add trace points in irqchip code Add tracepoint in msi/ioapic/pic set_irq() functions, in IPI sending and in the point where IRQ is placed into apic's IRR. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- include/trace/events/kvm.h | 56 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 77022af..dbe1084 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -28,6 +28,62 @@ TRACE_EVENT(kvm_set_irq, __entry->gsi, __entry->level, __entry->irq_source_id) ); +#define kvm_deliver_mode \ + {0x0, "Fixed"}, \ + {0x1, "LowPrio"}, \ + {0x2, "SMI"}, \ + {0x3, "Res3"}, \ + {0x4, "NMI"}, \ + {0x5, "INIT"}, \ + {0x6, "SIPI"}, \ + {0x7, "ExtINT"} + +TRACE_EVENT(kvm_ioapic_set_irq, + TP_PROTO(__u64 e, int pin, bool coalesced), + TP_ARGS(e, pin, coalesced), + + TP_STRUCT__entry( + __field( __u64, e ) + __field( int, pin ) + __field( bool, coalesced ) + ), + + TP_fast_assign( + __entry->e = e; + __entry->pin = pin; + __entry->coalesced = coalesced; + ), + + TP_printk("pin %u dst %x vec=%u (%s|%s|%s%s)%s", + __entry->pin, (u8)(__entry->e >> 56), (u8)__entry->e, + __print_symbolic((__entry->e >> 8 & 0x7), kvm_deliver_mode), + (__entry->e & (1<<11)) ? "logical" : "physical", + (__entry->e & (1<<15)) ? "level" : "edge", + (__entry->e & (1<<16)) ? "|masked" : "", + __entry->coalesced ? " (coalesced)" : "") +); + +TRACE_EVENT(kvm_msi_set_irq, + TP_PROTO(__u64 address, __u64 data), + TP_ARGS(address, data), + + TP_STRUCT__entry( + __field( __u64, address ) + __field( __u64, data ) + ), + + TP_fast_assign( + __entry->address = address; + __entry->data = data; + ), + + TP_printk("dst %u vec %x (%s|%s|%s%s)", + (u8)(__entry->address >> 12), (u8)__entry->data, + __print_symbolic((__entry->data >> 8 & 0x7), kvm_deliver_mode), + (__entry->address & (1<<2)) ? "logical" : "physical", + (__entry->data & (1<<15)) ? "level" : "edge", + (__entry->address & (1<<3)) ? "|rh" : "") +); #define kvm_irqchips \ {KVM_IRQCHIP_PIC_MASTER, "PIC master"}, \ -- cgit v1.1 From fe63b94a430c0c8058de317f0a8ce921c69dbee4 Mon Sep 17 00:00:00 2001 From: Carsten Emde Date: Sat, 12 Sep 2009 00:05:37 +0200 Subject: tracing: prevent NULL pointer dereference in ftrace_raw_event_block_bio_bounce Booting 2.6.31 and executing echo 1 >/sys/kernel/debug/tracing/events/enable leads to BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] ftrace_raw_event_block_bio_bounce+0x4b/0xb9 Apparently, bio = bio_map_user(q, NULL, uaddr, len, reading, gfp_mask); is called in block/blk-map.c:58 where bio->bi_bdev in set to NULL and still is NULL when an attempt is made to evaluate bio->bi_bdev->bd_dev in include/trace/events/block.h:189. The tracepoint should ensure bio->bi_bdev is not dereferenced, if NULL. Signed-off-by: Carsten Emde LKML-Reference: <4AAAC9B1.9060505@osadl.org> Signed-off-by: Steven Rostedt --- include/trace/events/block.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/block.h b/include/trace/events/block.h index 9a74b46..d86af94 100644 --- a/include/trace/events/block.h +++ b/include/trace/events/block.h @@ -171,6 +171,7 @@ TRACE_EVENT(block_rq_complete, (unsigned long long)__entry->sector, __entry->nr_sector, __entry->errors) ); + TRACE_EVENT(block_bio_bounce, TP_PROTO(struct request_queue *q, struct bio *bio), @@ -186,7 +187,8 @@ TRACE_EVENT(block_bio_bounce, ), TP_fast_assign( - __entry->dev = bio->bi_bdev->bd_dev; + __entry->dev = bio->bi_bdev ? + bio->bi_bdev->bd_dev : 0; __entry->sector = bio->bi_sector; __entry->nr_sector = bio->bi_size >> 9; blk_fill_rwbs(__entry->rwbs, bio->bi_rw, bio->bi_size); -- cgit v1.1 From f977bb4937857994312fff4f9c2cad336a36a932 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Sep 2009 18:15:54 +0200 Subject: perf_counter, sched: Add sched_stat_runtime tracepoint This allows more precise tracking of how the scheduler accounts (and acts upon) a task having spent N nanoseconds of CPU time. Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index b48f1ad..4069c43 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -380,6 +380,39 @@ TRACE_EVENT(sched_stat_wait, ); /* + * Tracepoint for accounting runtime (time the task is executing + * on a CPU). + */ +TRACE_EVENT(sched_stat_runtime, + + TP_PROTO(struct task_struct *tsk, u64 runtime, u64 vruntime), + + TP_ARGS(tsk, runtime, vruntime), + + TP_STRUCT__entry( + __array( char, comm, TASK_COMM_LEN ) + __field( pid_t, pid ) + __field( u64, runtime ) + __field( u64, vruntime ) + ), + + TP_fast_assign( + memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); + __entry->pid = tsk->pid; + __entry->runtime = runtime; + __entry->vruntime = vruntime; + ) + TP_perf_assign( + __perf_count(runtime); + ), + + TP_printk("task: %s:%d runtime: %Lu [ns], vruntime: %Lu [ns]", + __entry->comm, __entry->pid, + (unsigned long long)__entry->runtime, + (unsigned long long)__entry->vruntime) +); + +/* * Tracepoint for accounting sleep time (time the task is not runnable, * including iowait, see below). */ -- cgit v1.1 From ec827c7ece8901044e6b3f92aeea489be9e1bcf7 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Sep 2009 10:50:23 -0400 Subject: tracing: add static to generated TRACE_EVENT functions Some of the generated functions used in the TRACE_EVENT macros are not declared static, but they are not global. Discovered by sparse. Reported-by: Jaswinder Singh Rajput Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 308bafd..fa8ce03 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -254,7 +254,7 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \ -enum print_line_t \ +static enum print_line_t \ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ { \ struct trace_seq *s = &iter->seq; \ @@ -317,7 +317,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags) \ #undef TRACE_EVENT #define TRACE_EVENT(call, proto, args, tstruct, func, print) \ -int \ +static int \ ftrace_define_fields_##call(struct ftrace_event_call *event_call) \ { \ struct ftrace_raw_##call field; \ -- cgit v1.1 From a48f494e1dbdcf4fb7c02100ae3208c4c1daecbf Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 14 Sep 2009 11:18:02 -0400 Subject: tracing: have TRACE_EVENT macro use __flags to not shadow parameter The generated functions of TRACE_EVENT uses "flags" in one of the sub macros which shadows a parameter in the outside macro. Simple fix is to make the submacro use __flags instead. Discovered by sparse. Reported-by: Jaswinder Singh Rajput Signed-off-by: Steven Rostedt --- include/trace/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index fa8ce03..72a3b43 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -239,9 +239,9 @@ ftrace_format_##call(struct ftrace_event_call *unused, \ #undef __print_flags #define __print_flags(flag, delim, flag_array...) \ ({ \ - static const struct trace_print_flags flags[] = \ + static const struct trace_print_flags __flags[] = \ { flag_array, { -1, NULL }}; \ - ftrace_print_flags_seq(p, delim, flag, flags); \ + ftrace_print_flags_seq(p, delim, flag, __flags); \ }) #undef __print_symbolic -- cgit v1.1 From 3661d28615ea580c1db02a972fd4d3898df1cb01 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 14 Sep 2009 22:59:50 -0400 Subject: ext4: Fix include/trace/events/ext4.h to work with Systemtap Using relative pathnames in #include statements interacts badly with SystemTap, since the fs/ext4/*.h header files are not packaged up as part of a distribution kernel's header files. Since systemtap doesn't use TP_fast_assign(), we can use a blind structure definition and then make sure the needed header files are defined before the ext4 source files #include the trace/events/ext4.h header file. https://bugzilla.redhat.com/show_bug.cgi?id=512478 Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 68b53c7..6fe6ce9 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -5,10 +5,12 @@ #define _TRACE_EXT4_H #include -#include "../../../fs/ext4/ext4.h" -#include "../../../fs/ext4/mballoc.h" #include +struct ext4_allocation_context; +struct ext4_allocation_request; +struct ext4_prealloc_space; + TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), -- cgit v1.1 From 1fe06ad89255c211fe100d7f690d10b161398df8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 15 Sep 2009 15:10:20 +0200 Subject: writeback: get rid of wbc->for_writepages It's only set, it's never checked. Kill it. Acked-by: Jan Kara Signed-off-by: Jens Axboe --- include/trace/events/ext4.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 7d8b5bc..8d433c4e 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -227,7 +227,6 @@ TRACE_EVENT(ext4_da_writepages, __field( char, nonblocking ) __field( char, for_kupdate ) __field( char, for_reclaim ) - __field( char, for_writepages ) __field( char, range_cyclic ) ), @@ -241,16 +240,15 @@ TRACE_EVENT(ext4_da_writepages, __entry->nonblocking = wbc->nonblocking; __entry->for_kupdate = wbc->for_kupdate; __entry->for_reclaim = wbc->for_reclaim; - __entry->for_writepages = wbc->for_writepages; __entry->range_cyclic = wbc->range_cyclic; ), - TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d for_writepages %d range_cyclic %d", + TP_printk("dev %s ino %lu nr_t_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d", jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->nr_to_write, __entry->pages_skipped, __entry->range_start, __entry->range_end, __entry->nonblocking, __entry->for_kupdate, __entry->for_reclaim, - __entry->for_writepages, __entry->range_cyclic) + __entry->range_cyclic) ); TRACE_EVENT(ext4_da_writepages_result, -- cgit v1.1 From fb40ba0d98968bc3454731360363d725b4f1064c Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 16 Sep 2009 19:30:40 -0400 Subject: ext4: Add a tracepoint for ext4_alloc_da_blocks() Signed-off-by: "Theodore Ts'o" --- include/trace/events/ext4.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6fe6ce9..c1bd8f1 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -10,6 +10,9 @@ struct ext4_allocation_context; struct ext4_allocation_request; struct ext4_prealloc_space; +struct ext4_inode_info; + +#define EXT4_I(inode) (container_of(inode, struct ext4_inode_info, vfs_inode)) TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), @@ -710,6 +713,30 @@ TRACE_EVENT(ext4_sync_fs, __entry->wait) ); +TRACE_EVENT(ext4_alloc_da_blocks, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( unsigned int, data_blocks ) + __field( unsigned int, meta_blocks ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks; + __entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; + ), + + TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u", + jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, + __entry->data_blocks, __entry->meta_blocks) +); + #endif /* _TRACE_EXT4_H */ /* This part must be outside protection */ -- cgit v1.1 From 5dd4de587fd9c25cb32a7a0fe9feec3647509b6f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Thu, 17 Sep 2009 17:38:32 +0800 Subject: softirq: add BLOCK_IOPOLL to softirq_to_name With BLOCK_IOPOLL_SOFTIRQ added, softirq_to_name[] and show_softirq_name() needs to be updated. Signed-off-by: Li Zefan LKML-Reference: <4AB20398.8070209@cn.fujitsu.com> Signed-off-by: Steven Rostedt --- include/trace/events/irq.h | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'include/trace') diff --git a/include/trace/events/irq.h b/include/trace/events/irq.h index 1cb0c3a..b89f9db 100644 --- a/include/trace/events/irq.h +++ b/include/trace/events/irq.h @@ -8,16 +8,17 @@ #include #define softirq_name(sirq) { sirq##_SOFTIRQ, #sirq } -#define show_softirq_name(val) \ - __print_symbolic(val, \ - softirq_name(HI), \ - softirq_name(TIMER), \ - softirq_name(NET_TX), \ - softirq_name(NET_RX), \ - softirq_name(BLOCK), \ - softirq_name(TASKLET), \ - softirq_name(SCHED), \ - softirq_name(HRTIMER), \ +#define show_softirq_name(val) \ + __print_symbolic(val, \ + softirq_name(HI), \ + softirq_name(TIMER), \ + softirq_name(NET_TX), \ + softirq_name(NET_RX), \ + softirq_name(BLOCK), \ + softirq_name(BLOCK_IOPOLL), \ + softirq_name(TASKLET), \ + softirq_name(SCHED), \ + softirq_name(HRTIMER), \ softirq_name(RCU)) /** -- cgit v1.1 From e5e25cf47b0bdd1f7e9b8bb6368ee48e16de0c87 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 18 Sep 2009 00:54:43 +0200 Subject: tracing: Factorize the events profile accounting Factorize the events enabling accounting in a common tracing core helper. This reduces the size of the profile_enable() and profile_disable() callbacks for each trace events. Signed-off-by: Frederic Weisbecker Acked-by: Steven Rostedt Acked-by: Li Zefan Cc: Peter Zijlstra Cc: Jason Baron Cc: Masami Hiramatsu Cc: Ingo Molnar --- include/trace/ftrace.h | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 72a3b43..a822087 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -382,20 +382,14 @@ static inline int ftrace_get_offsets_##call( \ * * NOTE: The insertion profile callback (ftrace_profile_) is defined later * - * static int ftrace_profile_enable_(struct ftrace_event_call *event_call) + * static int ftrace_profile_enable_(void) * { - * int ret = 0; - * - * if (!atomic_inc_return(&event_call->profile_count)) - * ret = register_trace_(ftrace_profile_); - * - * return ret; + * return register_trace_(ftrace_profile_); * } * - * static void ftrace_profile_disable_(struct ftrace_event_call *event_call) + * static void ftrace_profile_disable_(void) * { - * if (atomic_add_negative(-1, &event->call->profile_count)) - * unregister_trace_(ftrace_profile_); + * unregister_trace_(ftrace_profile_); * } * */ @@ -405,20 +399,14 @@ static inline int ftrace_get_offsets_##call( \ \ static void ftrace_profile_##call(proto); \ \ -static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \ +static int ftrace_profile_enable_##call(void) \ { \ - int ret = 0; \ - \ - if (!atomic_inc_return(&event_call->profile_count)) \ - ret = register_trace_##call(ftrace_profile_##call); \ - \ - return ret; \ + return register_trace_##call(ftrace_profile_##call); \ } \ \ -static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\ +static void ftrace_profile_disable_##call(void) \ { \ - if (atomic_add_negative(-1, &event_call->profile_count)) \ - unregister_trace_##call(ftrace_profile_##call); \ + unregister_trace_##call(ftrace_profile_##call); \ } #include TRACE_INCLUDE(TRACE_INCLUDE_FILE) -- cgit v1.1 From 20ab4425a77a1f34028cc6ce57053c22c184ba5f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 18 Sep 2009 06:10:28 +0200 Subject: tracing: Allocate the ftrace event profile buffer dynamically Currently the trace event profile buffer is allocated in the stack. But this may be too much for the stack, as the events can have large statically defined field size and can also grow with dynamic arrays. Allocate two per cpu buffer for all profiled events. The first cpu buffer is used to host every non-nmi context traces. It is protected by disabling the interrupts while writing and committing the trace. The second buffer is reserved for nmi. So that there is no race between them and the first buffer. The whole write/commit section is rcu protected because we release these buffers while deactivating the last profiling trace event. v2: Move the buffers from trace_event to be global, as pointed by Steven Rostedt. v3: Fix the syscall events to handle the profiling buffer races by disabling interrupts, now that the buffers are globals. Suggested-by: Steven Rostedt Signed-off-by: Frederic Weisbecker Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Li Zefan Cc: Jason Baron Cc: Masami Hiramatsu Cc: Ingo Molnar --- include/trace/ftrace.h | 83 +++++++++++++++++++++++++++++++++----------------- 1 file changed, 55 insertions(+), 28 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index a822087..a0361cb 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -648,11 +648,12 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * struct ftrace_raw_##call *entry; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; + * struct trace_entry *ent; * int __entry_size; * int __data_size; + * int __cpu * int pc; * - * local_save_flags(irq_flags); * pc = preempt_count(); * * __data_size = ftrace_get_offsets_(&__data_offsets, args); @@ -663,25 +664,34 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * sizeof(u64)); * __entry_size -= sizeof(u32); * - * do { - * char raw_data[__entry_size]; <- allocate our sample in the stack - * struct trace_entry *ent; + * // Protect the non nmi buffer + * // This also protects the rcu read side + * local_irq_save(irq_flags); + * __cpu = smp_processor_id(); + * + * if (in_nmi()) + * raw_data = rcu_dereference(trace_profile_buf_nmi); + * else + * raw_data = rcu_dereference(trace_profile_buf); + * + * if (!raw_data) + * goto end; * - * zero dead bytes from alignment to avoid stack leak to userspace: + * raw_data = per_cpu_ptr(raw_data, __cpu); * - * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; - * entry = (struct ftrace_raw_ *)raw_data; - * ent = &entry->ent; - * tracing_generic_entry_update(ent, irq_flags, pc); - * ent->type = event_call->id; + * //zero dead bytes from alignment to avoid stack leak to userspace: + * *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; + * entry = (struct ftrace_raw_ *)raw_data; + * ent = &entry->ent; + * tracing_generic_entry_update(ent, irq_flags, pc); + * ent->type = event_call->id; * - * <- do some jobs with dynamic arrays + * <- do some jobs with dynamic arrays * - * <- affect our values + * <- affect our values * - * perf_tpcounter_event(event_call->id, __addr, __count, entry, - * __entry_size); <- submit them to perf counter - * } while (0); + * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * __entry_size); <- submit them to perf counter * * } */ @@ -704,11 +714,13 @@ static void ftrace_profile_##call(proto) \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ + struct trace_entry *ent; \ int __entry_size; \ int __data_size; \ + char *raw_data; \ + int __cpu; \ int pc; \ \ - local_save_flags(irq_flags); \ pc = preempt_count(); \ \ __data_size = ftrace_get_offsets_##call(&__data_offsets, args); \ @@ -716,23 +728,38 @@ static void ftrace_profile_##call(proto) \ sizeof(u64)); \ __entry_size -= sizeof(u32); \ \ - do { \ - char raw_data[__entry_size]; \ - struct trace_entry *ent; \ + if (WARN_ONCE(__entry_size > FTRACE_MAX_PROFILE_SIZE, \ + "profile buffer not large enough")) \ + return; \ + \ + local_irq_save(irq_flags); \ + __cpu = smp_processor_id(); \ \ - *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ - entry = (struct ftrace_raw_##call *)raw_data; \ - ent = &entry->ent; \ - tracing_generic_entry_update(ent, irq_flags, pc); \ - ent->type = event_call->id; \ + if (in_nmi()) \ + raw_data = rcu_dereference(trace_profile_buf_nmi); \ + else \ + raw_data = rcu_dereference(trace_profile_buf); \ \ - tstruct \ + if (!raw_data) \ + goto end; \ \ - { assign; } \ + raw_data = per_cpu_ptr(raw_data, __cpu); \ \ - perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + *(u64 *)(&raw_data[__entry_size - sizeof(u64)]) = 0ULL; \ + entry = (struct ftrace_raw_##call *)raw_data; \ + ent = &entry->ent; \ + tracing_generic_entry_update(ent, irq_flags, pc); \ + ent->type = event_call->id; \ + \ + tstruct \ + \ + { assign; } \ + \ + perf_tpcounter_event(event_call->id, __addr, __count, entry, \ __entry_size); \ - } while (0); \ + \ +end: \ + local_irq_restore(irq_flags); \ \ } -- cgit v1.1 From 6161352142d5fed4cd753b32e5ccde66e705b14e Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Thu, 17 Sep 2009 16:11:28 +0200 Subject: tracing, perf: Convert the power tracer into an event tracer This patch converts the existing power tracer into an event tracer, so that power events (C states and frequency changes) can be tracked via "perf". This also removes the perl script that was used to demo the tracer; its functionality is being replaced entirely with timechart. Signed-off-by: Arjan van de Ven Acked-by: Peter Zijlstra Cc: Paul Mackerras Cc: Frederic Weisbecker LKML-Reference: <20090912130542.6d314860@infradead.org> Signed-off-by: Ingo Molnar --- include/trace/events/power.h | 81 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 include/trace/events/power.h (limited to 'include/trace') diff --git a/include/trace/events/power.h b/include/trace/events/power.h new file mode 100644 index 0000000..ea6d579 --- /dev/null +++ b/include/trace/events/power.h @@ -0,0 +1,81 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM power + +#if !defined(_TRACE_POWER_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_POWER_H + +#include +#include + +#ifndef _TRACE_POWER_ENUM_ +#define _TRACE_POWER_ENUM_ +enum { + POWER_NONE = 0, + POWER_CSTATE = 1, + POWER_PSTATE = 2, +}; +#endif + + + +TRACE_EVENT(power_start, + + TP_PROTO(unsigned int type, unsigned int state), + + TP_ARGS(type, state), + + TP_STRUCT__entry( + __field( u64, type ) + __field( u64, state ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->state = state; + ), + + TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long)__entry->state) +); + +TRACE_EVENT(power_end, + + TP_PROTO(int dummy), + + TP_ARGS(dummy), + + TP_STRUCT__entry( + __field( u64, dummy ) + ), + + TP_fast_assign( + __entry->dummy = 0xffff; + ), + + TP_printk("dummy=%lu", (unsigned long)__entry->dummy) + +); + + +TRACE_EVENT(power_frequency, + + TP_PROTO(unsigned int type, unsigned int state), + + TP_ARGS(type, state), + + TP_STRUCT__entry( + __field( u64, type ) + __field( u64, state ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->state = state; + ), + + TP_printk("type=%lu state=%lu", (unsigned long)__entry->type, (unsigned long) __entry->state) +); + +#endif /* _TRACE_POWER_H */ + +/* This part must be outside protection */ +#include -- cgit v1.1 From cdd6c482c9ff9c55475ee7392ec8f672eddb7be6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 21 Sep 2009 12:02:48 +0200 Subject: perf: Do the big rename: Performance Counters -> Performance Events Bye-bye Performance Counters, welcome Performance Events! In the past few months the perfcounters subsystem has grown out its initial role of counting hardware events, and has become (and is becoming) a much broader generic event enumeration, reporting, logging, monitoring, analysis facility. Naming its core object 'perf_counter' and naming the subsystem 'perfcounters' has become more and more of a misnomer. With pending code like hw-breakpoints support the 'counter' name is less and less appropriate. All in one, we've decided to rename the subsystem to 'performance events' and to propagate this rename through all fields, variables and API names. (in an ABI compatible fashion) The word 'event' is also a bit shorter than 'counter' - which makes it slightly more convenient to write/handle as well. Thanks goes to Stephane Eranian who first observed this misnomer and suggested a rename. User-space tooling and ABI compatibility is not affected - this patch should be function-invariant. (Also, defconfigs were not touched to keep the size down.) This patch has been generated via the following script: FILES=$(find * -type f | grep -vE 'oprofile|[^K]config') sed -i \ -e 's/PERF_EVENT_/PERF_RECORD_/g' \ -e 's/PERF_COUNTER/PERF_EVENT/g' \ -e 's/perf_counter/perf_event/g' \ -e 's/nb_counters/nb_events/g' \ -e 's/swcounter/swevent/g' \ -e 's/tpcounter_event/tp_event/g' \ $FILES for N in $(find . -name perf_counter.[ch]); do M=$(echo $N | sed 's/perf_counter/perf_event/g') mv $N $M done FILES=$(find . -name perf_event.*) sed -i \ -e 's/COUNTER_MASK/REG_MASK/g' \ -e 's/COUNTER/EVENT/g' \ -e 's/\/event_id/g' \ -e 's/counter/event/g' \ -e 's/Counter/Event/g' \ $FILES ... to keep it as correct as possible. This script can also be used by anyone who has pending perfcounters patches - it converts a Linux kernel tree over to the new naming. We tried to time this change to the point in time where the amount of pending patches is the smallest: the end of the merge window. Namespace clashes were fixed up in a preparatory patch - and some stylistic fallout will be fixed up in a subsequent patch. ( NOTE: 'counters' are still the proper terminology when we deal with hardware registers - and these sed scripts are a bit over-eager in renaming them. I've undone some of that, but in case there's something left where 'counter' would be better than 'event' we can undo that on an individual basis instead of touching an otherwise nicely automated patch. ) Suggested-by: Stephane Eranian Acked-by: Peter Zijlstra Acked-by: Paul Mackerras Reviewed-by: Arjan van de Ven Cc: Mike Galbraith Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Steven Rostedt Cc: Benjamin Herrenschmidt Cc: David Howells Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: "David S. Miller" Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: LKML-Reference: Signed-off-by: Ingo Molnar --- include/trace/ftrace.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/trace') diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h index 72a3b43..ec91e78 100644 --- a/include/trace/ftrace.h +++ b/include/trace/ftrace.h @@ -378,7 +378,7 @@ static inline int ftrace_get_offsets_##call( \ #ifdef CONFIG_EVENT_PROFILE /* - * Generate the functions needed for tracepoint perf_counter support. + * Generate the functions needed for tracepoint perf_event support. * * NOTE: The insertion profile callback (ftrace_profile_) is defined later * @@ -656,7 +656,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * { * struct ftrace_data_offsets_ __maybe_unused __data_offsets; * struct ftrace_event_call *event_call = &event_; - * extern void perf_tpcounter_event(int, u64, u64, void *, int); + * extern void perf_tp_event(int, u64, u64, void *, int); * struct ftrace_raw_##call *entry; * u64 __addr = 0, __count = 1; * unsigned long irq_flags; @@ -691,7 +691,7 @@ __attribute__((section("_ftrace_events"))) event_##call = { \ * * <- affect our values * - * perf_tpcounter_event(event_call->id, __addr, __count, entry, + * perf_tp_event(event_call->id, __addr, __count, entry, * __entry_size); <- submit them to perf counter * } while (0); * @@ -712,7 +712,7 @@ static void ftrace_profile_##call(proto) \ { \ struct ftrace_data_offsets_##call __maybe_unused __data_offsets;\ struct ftrace_event_call *event_call = &event_##call; \ - extern void perf_tpcounter_event(int, u64, u64, void *, int); \ + extern void perf_tp_event(int, u64, u64, void *, int); \ struct ftrace_raw_##call *entry; \ u64 __addr = 0, __count = 1; \ unsigned long irq_flags; \ @@ -742,7 +742,7 @@ static void ftrace_profile_##call(proto) \ \ { assign; } \ \ - perf_tpcounter_event(event_call->id, __addr, __count, entry,\ + perf_tp_event(event_call->id, __addr, __count, entry,\ __entry_size); \ } while (0); \ \ -- cgit v1.1 From 4b4f278c030aa4b6ee0915f396e9a9478d92d610 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:41 -0700 Subject: tracing, page-allocator: add trace events for page allocation and page freeing This patch adds trace events for the allocation and freeing of pages, including the freeing of pagevecs. Using the events, it will be known what struct page and pfns are being allocated and freed and what the call site was in many cases. The page alloc tracepoints be used as an indicator as to whether the workload was heavily dependant on the page allocator or not. You can make a guess based on vmstat but you can't get a per-process breakdown. Depending on the call path, the call_site for page allocation may be __get_free_pages() instead of a useful callsite. Instead of passing down a return address similar to slab debugging, the user should enable the stacktrace and seg-addr options to get a proper stack trace. The pagevec free tracepoint has a different usecase. It can be used to get a idea of how many pages are being dumped off the LRU and whether it is kswapd doing the work or a process doing direct reclaim. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 74 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 1493c54..0d358a0 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -225,6 +225,80 @@ TRACE_EVENT(kmem_cache_free, TP_printk("call_site=%lx ptr=%p", __entry->call_site, __entry->ptr) ); + +TRACE_EVENT(mm_page_free_direct, + + TP_PROTO(struct page *page, unsigned int order), + + TP_ARGS(page, order), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + ), + + TP_printk("page=%p pfn=%lu order=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order) +); + +TRACE_EVENT(mm_pagevec_free, + + TP_PROTO(struct page *page, int cold), + + TP_ARGS(page, cold), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, cold ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->cold = cold; + ), + + TP_printk("page=%p pfn=%lu order=0 cold=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->cold) +); + +TRACE_EVENT(mm_page_alloc, + + TP_PROTO(struct page *page, unsigned int order, + gfp_t gfp_flags, int migratetype), + + TP_ARGS(page, order, gfp_flags, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + __field( gfp_t, gfp_flags ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->gfp_flags = gfp_flags; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype, + show_gfp_flags(__entry->gfp_flags)) +); + #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ -- cgit v1.1 From e0fff1bd12469c45dab088e353d8882761387bb6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:42 -0700 Subject: tracing, page-allocator: add trace events for anti-fragmentation falling back to other migratetypes Fragmentation avoidance depends on being able to use free pages from lists of the appropriate migrate type. In the event this is not possible, __rmqueue_fallback() selects a different list and in some circumstances change the migratetype of the pageblock. Simplistically, the more times this event occurs, the more likely that fragmentation will be a problem later for hugepage allocation at least but there are other considerations such as the order of page being split to satisfy the allocation. This patch adds a trace event for __rmqueue_fallback() that reports what page is being used for the fallback, the orders of relevant pages, the desired migratetype and the migratetype of the lists being used, whether the pageblock changed type and whether this event is important with respect to fragmentation avoidance or not. This information can be used to help analyse fragmentation avoidance and help decide whether min_free_kbytes should be increased or not. Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index 0d358a0..aae16ee 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -299,6 +299,44 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); +TRACE_EVENT(mm_page_alloc_extfrag, + + TP_PROTO(struct page *page, + int alloc_order, int fallback_order, + int alloc_migratetype, int fallback_migratetype), + + TP_ARGS(page, + alloc_order, fallback_order, + alloc_migratetype, fallback_migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, alloc_order ) + __field( int, fallback_order ) + __field( int, alloc_migratetype ) + __field( int, fallback_migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->alloc_order = alloc_order; + __entry->fallback_order = fallback_order; + __entry->alloc_migratetype = alloc_migratetype; + __entry->fallback_migratetype = fallback_migratetype; + ), + + TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->alloc_order, + __entry->fallback_order, + pageblock_order, + __entry->alloc_migratetype, + __entry->fallback_migratetype, + __entry->fallback_order < pageblock_order, + __entry->alloc_migratetype == __entry->fallback_migratetype) +); + #endif /* _TRACE_KMEM_H */ /* This part must be outside protection */ -- cgit v1.1 From 0d3d062a6e289e065bd0aa537a6806a1806bf8aa Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 21 Sep 2009 17:02:44 -0700 Subject: tracing, page-allocator: add trace event for page traffic related to the buddy lists The page allocation trace event reports that a page was successfully allocated but it does not specify where it came from. When analysing performance, it can be important to distinguish between pages coming from the per-cpu allocator and pages coming from the buddy lists as the latter requires the zone lock to the taken and more data structures to be examined. This patch adds a trace event for __rmqueue reporting when a page is being allocated from the buddy lists. It distinguishes between being called to refill the per-cpu lists or whether it is a high-order allocation. Similarly, this patch adds an event to catch when the PCP lists are being drained a little and pages are going back to the buddy lists. This is trickier to draw conclusions from but high activity on those events could explain why there were a large number of cache misses on a page-allocator-intensive workload. The coalescing and splitting of buddies involves a lot of writing of page metadata and cache line bounces not to mention the acquisition of an interrupt-safe lock necessary to enter this path. [akpm@linux-foundation.org: fix build] Signed-off-by: Mel Gorman Acked-by: Rik van Riel Reviewed-by: Ingo Molnar Cc: Larry Woodman Cc: Peter Zijlstra Cc: Li Ming Chun Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/kmem.h | 51 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index aae16ee..eaf46bd 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -299,6 +299,57 @@ TRACE_EVENT(mm_page_alloc, show_gfp_flags(__entry->gfp_flags)) ); +TRACE_EVENT(mm_page_alloc_zone_locked, + + TP_PROTO(struct page *page, unsigned int order, int migratetype), + + TP_ARGS(page, order, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( unsigned int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype, + __entry->order == 0) +); + +TRACE_EVENT(mm_page_pcpu_drain, + + TP_PROTO(struct page *page, int order, int migratetype), + + TP_ARGS(page, order, migratetype), + + TP_STRUCT__entry( + __field( struct page *, page ) + __field( int, order ) + __field( int, migratetype ) + ), + + TP_fast_assign( + __entry->page = page; + __entry->order = order; + __entry->migratetype = migratetype; + ), + + TP_printk("page=%p pfn=%lu order=%d migratetype=%d", + __entry->page, + page_to_pfn(__entry->page), + __entry->order, + __entry->migratetype) +); + TRACE_EVENT(mm_page_alloc_extfrag, TP_PROTO(struct page *page, -- cgit v1.1