aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/events/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/events/core.c')
-rw-r--r--kernel/events/core.c300
1 files changed, 258 insertions, 42 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index acdc087..466213f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -604,6 +604,77 @@ static void put_ctx(struct perf_event_context *ctx)
}
}
+/*
+ * Because of perf_event::ctx migration in sys_perf_event_open::move_group and
+ * perf_pmu_migrate_context() we need some magic.
+ *
+ * Those places that change perf_event::ctx will hold both
+ * perf_event_ctx::mutex of the 'old' and 'new' ctx value.
+ *
+ * Lock ordering is by mutex address. There is one other site where
+ * perf_event_context::mutex nests and that is put_event(). But remember that
+ * that is a parent<->child context relation, and migration does not affect
+ * children, therefore these two orderings should not interact.
+ *
+ * The change in perf_event::ctx does not affect children (as claimed above)
+ * because the sys_perf_event_open() case will install a new event and break
+ * the ctx parent<->child relation, and perf_pmu_migrate_context() is only
+ * concerned with cpuctx and that doesn't have children.
+ *
+ * The places that change perf_event::ctx will issue:
+ *
+ * perf_remove_from_context();
+ * synchronize_rcu();
+ * perf_install_in_context();
+ *
+ * to affect the change. The remove_from_context() + synchronize_rcu() should
+ * quiesce the event, after which we can install it in the new location. This
+ * means that only external vectors (perf_fops, prctl) can perturb the event
+ * while in transit. Therefore all such accessors should also acquire
+ * perf_event_context::mutex to serialize against this.
+ *
+ * However; because event->ctx can change while we're waiting to acquire
+ * ctx->mutex we must be careful and use the below perf_event_ctx_lock()
+ * function.
+ *
+ * Lock order:
+ * task_struct::perf_event_mutex
+ * perf_event_context::mutex
+ * perf_event_context::lock
+ * perf_event::child_mutex;
+ * perf_event::mmap_mutex
+ * mmap_sem
+ */
+static struct perf_event_context *perf_event_ctx_lock(struct perf_event *event)
+{
+ struct perf_event_context *ctx;
+
+again:
+ rcu_read_lock();
+ ctx = ACCESS_ONCE(event->ctx);
+ if (!atomic_inc_not_zero(&ctx->refcount)) {
+ rcu_read_unlock();
+ goto again;
+ }
+ rcu_read_unlock();
+
+ mutex_lock(&ctx->mutex);
+ if (event->ctx != ctx) {
+ mutex_unlock(&ctx->mutex);
+ put_ctx(ctx);
+ goto again;
+ }
+
+ return ctx;
+}
+
+static void perf_event_ctx_unlock(struct perf_event *event,
+ struct perf_event_context *ctx)
+{
+ mutex_unlock(&ctx->mutex);
+ put_ctx(ctx);
+}
+
static void unclone_ctx(struct perf_event_context *ctx)
{
if (ctx->parent_ctx) {
@@ -1245,7 +1316,7 @@ static int __perf_event_disable(void *info)
* is the current context on this CPU and preemption is disabled,
* hence we can't get into perf_event_task_sched_out for this context.
*/
-void perf_event_disable(struct perf_event *event)
+static void _perf_event_disable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -1287,6 +1358,19 @@ retry:
raw_spin_unlock_irq(&ctx->lock);
}
+/*
+ * Strictly speaking kernel users cannot create groups and therefore this
+ * interface does not need the perf_event_ctx_lock() magic.
+ */
+void perf_event_disable(struct perf_event *event)
+{
+ struct perf_event_context *ctx;
+
+ ctx = perf_event_ctx_lock(event);
+ _perf_event_disable(event);
+ perf_event_ctx_unlock(event, ctx);
+}
+
static void perf_set_shadow_time(struct perf_event *event,
struct perf_event_context *ctx,
u64 tstamp)
@@ -1580,6 +1664,8 @@ perf_install_in_context(struct perf_event_context *ctx,
lockdep_assert_held(&ctx->mutex);
event->ctx = ctx;
+ if (event->cpu != -1)
+ event->cpu = cpu;
if (!task) {
/*
@@ -1720,7 +1806,7 @@ unlock:
* perf_event_for_each_child or perf_event_for_each as described
* for perf_event_disable.
*/
-void perf_event_enable(struct perf_event *event)
+static void _perf_event_enable(struct perf_event *event)
{
struct perf_event_context *ctx = event->ctx;
struct task_struct *task = ctx->task;
@@ -1777,7 +1863,19 @@ out:
raw_spin_unlock_irq(&ctx->lock);
}
-static int perf_event_refresh(struct perf_event *event, int refresh)
+/*
+ * See perf_event_disable();
+ */
+void perf_event_enable(struct perf_event *event)
+{
+ struct perf_event_context *ctx;
+
+ ctx = perf_event_ctx_lock(event);
+ _perf_event_enable(event);
+ perf_event_ctx_unlock(event, ctx);
+}
+
+static int _perf_event_refresh(struct perf_event *event, int refresh)
{
/*
* not supported on inherited events
@@ -1786,11 +1884,26 @@ static int perf_event_refresh(struct perf_event *event, int refresh)
return -EINVAL;
atomic_add(refresh, &event->event_limit);
- perf_event_enable(event);
+ _perf_event_enable(event);
return 0;
}
+/*
+ * See perf_event_disable()
+ */
+int perf_event_refresh(struct perf_event *event, int refresh)
+{
+ struct perf_event_context *ctx;
+ int ret;
+
+ ctx = perf_event_ctx_lock(event);
+ ret = _perf_event_refresh(event, refresh);
+ perf_event_ctx_unlock(event, ctx);
+
+ return ret;
+}
+
static void ctx_sched_out(struct perf_event_context *ctx,
struct perf_cpu_context *cpuctx,
enum event_type_t event_type)
@@ -3034,7 +3147,16 @@ static void put_event(struct perf_event *event)
rcu_read_unlock();
if (owner) {
- mutex_lock(&owner->perf_event_mutex);
+ /*
+ * If we're here through perf_event_exit_task() we're already
+ * holding ctx->mutex which would be an inversion wrt. the
+ * normal lock order.
+ *
+ * However we can safely take this lock because its the child
+ * ctx->mutex.
+ */
+ mutex_lock_nested(&owner->perf_event_mutex, SINGLE_DEPTH_NESTING);
+
/*
* We have to re-check the event->owner field, if it is cleared
* we raced with perf_event_exit_task(), acquiring the mutex
@@ -3086,12 +3208,13 @@ static int perf_event_read_group(struct perf_event *event,
u64 read_format, char __user *buf)
{
struct perf_event *leader = event->group_leader, *sub;
- int n = 0, size = 0, ret = -EFAULT;
struct perf_event_context *ctx = leader->ctx;
- u64 values[5];
+ int n = 0, size = 0, ret;
u64 count, enabled, running;
+ u64 values[5];
+
+ lockdep_assert_held(&ctx->mutex);
- mutex_lock(&ctx->mutex);
count = perf_event_read_value(leader, &enabled, &running);
values[n++] = 1 + leader->nr_siblings;
@@ -3106,7 +3229,7 @@ static int perf_event_read_group(struct perf_event *event,
size = n * sizeof(u64);
if (copy_to_user(buf, values, size))
- goto unlock;
+ return -EFAULT;
ret = size;
@@ -3120,14 +3243,12 @@ static int perf_event_read_group(struct perf_event *event,
size = n * sizeof(u64);
if (copy_to_user(buf + ret, values, size)) {
- ret = -EFAULT;
- goto unlock;
+ return -EFAULT;
}
ret += size;
}
-unlock:
- mutex_unlock(&ctx->mutex);
+
return ret;
}
@@ -3186,8 +3307,14 @@ static ssize_t
perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
{
struct perf_event *event = file->private_data;
+ struct perf_event_context *ctx;
+ int ret;
+
+ ctx = perf_event_ctx_lock(event);
+ ret = perf_read_hw(event, buf, count);
+ perf_event_ctx_unlock(event, ctx);
- return perf_read_hw(event, buf, count);
+ return ret;
}
static unsigned int perf_poll(struct file *file, poll_table *wait)
@@ -3207,7 +3334,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
return events;
}
-static void perf_event_reset(struct perf_event *event)
+static void _perf_event_reset(struct perf_event *event)
{
(void)perf_event_read(event);
local64_set(&event->count, 0);
@@ -3306,25 +3433,24 @@ static int perf_event_set_output(struct perf_event *event,
struct perf_event *output_event);
static int perf_event_set_filter(struct perf_event *event, void __user *arg);
-static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
{
- struct perf_event *event = file->private_data;
void (*func)(struct perf_event *);
u32 flags = arg;
switch (cmd) {
case PERF_EVENT_IOC_ENABLE:
- func = perf_event_enable;
+ func = _perf_event_enable;
break;
case PERF_EVENT_IOC_DISABLE:
- func = perf_event_disable;
+ func = _perf_event_disable;
break;
case PERF_EVENT_IOC_RESET:
- func = perf_event_reset;
+ func = _perf_event_reset;
break;
case PERF_EVENT_IOC_REFRESH:
- return perf_event_refresh(event, arg);
+ return _perf_event_refresh(event, arg);
case PERF_EVENT_IOC_PERIOD:
return perf_event_period(event, (u64 __user *)arg);
@@ -3365,13 +3491,30 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return 0;
}
+static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ struct perf_event *event = file->private_data;
+ struct perf_event_context *ctx;
+ long ret;
+
+ ctx = perf_event_ctx_lock(event);
+ ret = _perf_ioctl(event, cmd, arg);
+ perf_event_ctx_unlock(event, ctx);
+
+ return ret;
+}
+
int perf_event_task_enable(void)
{
+ struct perf_event_context *ctx;
struct perf_event *event;
mutex_lock(&current->perf_event_mutex);
- list_for_each_entry(event, &current->perf_event_list, owner_entry)
- perf_event_for_each_child(event, perf_event_enable);
+ list_for_each_entry(event, &current->perf_event_list, owner_entry) {
+ ctx = perf_event_ctx_lock(event);
+ perf_event_for_each_child(event, _perf_event_enable);
+ perf_event_ctx_unlock(event, ctx);
+ }
mutex_unlock(&current->perf_event_mutex);
return 0;
@@ -3379,11 +3522,15 @@ int perf_event_task_enable(void)
int perf_event_task_disable(void)
{
+ struct perf_event_context *ctx;
struct perf_event *event;
mutex_lock(&current->perf_event_mutex);
- list_for_each_entry(event, &current->perf_event_list, owner_entry)
- perf_event_for_each_child(event, perf_event_disable);
+ list_for_each_entry(event, &current->perf_event_list, owner_entry) {
+ ctx = perf_event_ctx_lock(event);
+ perf_event_for_each_child(event, _perf_event_disable);
+ perf_event_ctx_unlock(event, ctx);
+ }
mutex_unlock(&current->perf_event_mutex);
return 0;
@@ -5416,7 +5563,6 @@ static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
int err = 0;
mutex_lock(&swhash->hlist_mutex);
-
if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
struct swevent_hlist *hlist;
@@ -6216,6 +6362,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
if (!group_leader)
group_leader = event;
+ mutex_init(&event->group_leader_mutex);
mutex_init(&event->child_mutex);
INIT_LIST_HEAD(&event->child_list);
@@ -6444,6 +6591,15 @@ out:
return ret;
}
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+ if (b < a)
+ swap(a, b);
+
+ mutex_lock(a);
+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
/**
* sys_perf_event_open - open a performance event, associate it to a task/cpu
*
@@ -6459,7 +6615,7 @@ SYSCALL_DEFINE5(perf_event_open,
struct perf_event *group_leader = NULL, *output_event = NULL;
struct perf_event *event, *sibling;
struct perf_event_attr attr;
- struct perf_event_context *ctx;
+ struct perf_event_context *ctx, *uninitialized_var(gctx);
struct file *event_file = NULL;
struct file *group_file = NULL;
struct task_struct *task = NULL;
@@ -6513,6 +6669,16 @@ SYSCALL_DEFINE5(perf_event_open,
group_leader = NULL;
}
+ /*
+ * Take the group_leader's group_leader_mutex before observing
+ * anything in the group leader that leads to changes in ctx,
+ * many of which may be changing on another thread.
+ * In particular, we want to take this lock before deciding
+ * whether we need to move_group.
+ */
+ if (group_leader)
+ mutex_lock(&group_leader->group_leader_mutex);
+
if (pid != -1 && !(flags & PERF_FLAG_PID_CGROUP)) {
task = find_lively_task_by_vpid(pid);
if (IS_ERR(task)) {
@@ -6572,7 +6738,7 @@ SYSCALL_DEFINE5(perf_event_open,
/*
* Get the target context (task or percpu):
*/
- ctx = find_get_context(pmu, task, cpu);
+ ctx = find_get_context(pmu, task, event->cpu);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
goto err_alloc;
@@ -6627,9 +6793,14 @@ SYSCALL_DEFINE5(perf_event_open,
}
if (move_group) {
- struct perf_event_context *gctx = group_leader->ctx;
+ gctx = group_leader->ctx;
+
+ /*
+ * See perf_event_ctx_lock() for comments on the details
+ * of swizzling perf_event::ctx.
+ */
+ mutex_lock_double(&gctx->mutex, &ctx->mutex);
- mutex_lock(&gctx->mutex);
perf_remove_from_context(group_leader);
/*
@@ -6644,28 +6815,41 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__state_init(sibling);
put_ctx(gctx);
}
- mutex_unlock(&gctx->mutex);
- put_ctx(gctx);
+ } else {
+ mutex_lock(&ctx->mutex);
}
WARN_ON_ONCE(ctx->parent_ctx);
- mutex_lock(&ctx->mutex);
if (move_group) {
- perf_install_in_context(ctx, group_leader, cpu);
+ /*
+ * Wait for everybody to stop referencing the events through
+ * the old lists, before installing it on new lists.
+ */
+ synchronize_rcu();
+
+ perf_install_in_context(ctx, group_leader, event->cpu);
get_ctx(ctx);
list_for_each_entry(sibling, &group_leader->sibling_list,
group_entry) {
- perf_install_in_context(ctx, sibling, cpu);
+ perf_install_in_context(ctx, sibling, event->cpu);
get_ctx(ctx);
}
}
- perf_install_in_context(ctx, event, cpu);
+ perf_install_in_context(ctx, event, event->cpu);
++ctx->generation;
perf_unpin_context(ctx);
+
+ if (move_group) {
+ mutex_unlock(&gctx->mutex);
+ put_ctx(gctx);
+ }
mutex_unlock(&ctx->mutex);
+ if (group_leader)
+ mutex_unlock(&group_leader->group_leader_mutex);
+
event->owner = current;
mutex_lock(&current->perf_event_mutex);
@@ -6697,6 +6881,8 @@ err_task:
if (task)
put_task_struct(task);
err_group_fd:
+ if (group_leader)
+ mutex_unlock(&group_leader->group_leader_mutex);
fput_light(group_file, fput_needed);
err_fd:
put_unused_fd(event_fd);
@@ -6751,6 +6937,42 @@ err:
}
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
+void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+{
+ struct perf_event_context *src_ctx;
+ struct perf_event_context *dst_ctx;
+ struct perf_event *event, *tmp;
+ LIST_HEAD(events);
+
+ src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
+ dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+
+ /*
+ * See perf_event_ctx_lock() for comments on the details
+ * of swizzling perf_event::ctx.
+ */
+ mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
+ list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
+ event_entry) {
+ perf_remove_from_context(event);
+ put_ctx(src_ctx);
+ list_add(&event->event_entry, &events);
+ }
+
+ synchronize_rcu();
+
+ list_for_each_entry_safe(event, tmp, &events, event_entry) {
+ list_del(&event->event_entry);
+ if (event->state >= PERF_EVENT_STATE_OFF)
+ event->state = PERF_EVENT_STATE_INACTIVE;
+ perf_install_in_context(dst_ctx, event, dst_cpu);
+ get_ctx(dst_ctx);
+ }
+ mutex_unlock(&dst_ctx->mutex);
+ mutex_unlock(&src_ctx->mutex);
+}
+EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
+
static void sync_child_event(struct perf_event *child_event,
struct task_struct *child)
{
@@ -7309,12 +7531,6 @@ static void perf_event_exit_cpu_context(int cpu)
static void perf_event_exit_cpu(int cpu)
{
- struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
-
- mutex_lock(&swhash->hlist_mutex);
- swevent_hlist_release(swhash);
- mutex_unlock(&swhash->hlist_mutex);
-
perf_event_exit_cpu_context(cpu);
}
#else