From 9ec2690758a5467f24beb301cca5098078073bba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 20 May 2011 16:18:50 +0200 Subject: timerfd: Manage cancelable timers in timerfd Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra Signed-off-by: Thomas Gleixner Reviewed-by: Peter Zijlstra --- fs/timerfd.c | 105 +++++++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 74 insertions(+), 31 deletions(-) (limited to 'fs/timerfd.c') diff --git a/fs/timerfd.c b/fs/timerfd.c index 7e14c9e..f67acbd 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -22,6 +22,7 @@ #include #include #include +#include struct timerfd_ctx { struct hrtimer tmr; @@ -31,9 +32,14 @@ struct timerfd_ctx { u64 ticks; int expired; int clockid; + struct rcu_head rcu; + struct list_head clist; bool might_cancel; }; +static LIST_HEAD(cancel_list); +static DEFINE_SPINLOCK(cancel_lock); + /* * This gets called when the timer event triggers. We set the "expired" * flag, but we do not re-arm the timer (in case it's necessary, @@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) return HRTIMER_NORESTART; } -static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +/* + * Called when the clock was set to cancel the timers in the cancel + * list. + */ +void timerfd_clock_was_set(void) { - ktime_t remaining; + ktime_t moffs = ktime_get_monotonic_offset(); + struct timerfd_ctx *ctx; + unsigned long flags; - remaining = hrtimer_expires_remaining(&ctx->tmr); - return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; + rcu_read_lock(); + list_for_each_entry_rcu(ctx, &cancel_list, clist) { + if (!ctx->might_cancel) + continue; + spin_lock_irqsave(&ctx->wqh.lock, flags); + if (ctx->moffs.tv64 != moffs.tv64) { + ctx->moffs.tv64 = KTIME_MAX; + wake_up_locked(&ctx->wqh); + } + spin_unlock_irqrestore(&ctx->wqh.lock, flags); + } + rcu_read_unlock(); } -static bool timerfd_canceled(struct timerfd_ctx *ctx) +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) { - ktime_t moffs; + if (ctx->might_cancel) { + ctx->might_cancel = false; + spin_lock(&cancel_lock); + list_del_rcu(&ctx->clist); + spin_unlock(&cancel_lock); + } +} - if (!ctx->might_cancel) +static bool timerfd_canceled(struct timerfd_ctx *ctx) +{ + if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) return false; + ctx->moffs = ktime_get_monotonic_offset(); + return true; +} - moffs = ktime_get_monotonic_offset(); +static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) +{ + if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && + (flags & TFD_TIMER_CANCEL_ON_SET)) { + if (!ctx->might_cancel) { + ctx->might_cancel = true; + spin_lock(&cancel_lock); + list_add_rcu(&ctx->clist, &cancel_list); + spin_unlock(&cancel_lock); + } + } else if (ctx->might_cancel) { + timerfd_remove_cancel(ctx); + } +} - if (moffs.tv64 == ctx->moffs.tv64) - return false; +static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) +{ + ktime_t remaining; - ctx->moffs = moffs; - return true; + remaining = hrtimer_expires_remaining(&ctx->tmr); + return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; } static int timerfd_setup(struct timerfd_ctx *ctx, int flags, @@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, htmode = (flags & TFD_TIMER_ABSTIME) ? HRTIMER_MODE_ABS: HRTIMER_MODE_REL; - ctx->might_cancel = false; - if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && - (flags & TFD_TIMER_CANCELON_SET)) { - clockid = CLOCK_REALTIME_COS; - ctx->might_cancel = true; - } - texp = timespec_to_ktime(ktmr->it_value); ctx->expired = 0; ctx->ticks = 0; @@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file) { struct timerfd_ctx *ctx = file->private_data; + timerfd_remove_cancel(ctx); hrtimer_cancel(&ctx->tmr); - kfree(ctx); + kfree_rcu(ctx, rcu); return 0; } @@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, else res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); + /* + * If clock has changed, we do not care about the + * ticks and we do not rearm the timer. Userspace must + * reevaluate anyway. + */ + if (timerfd_canceled(ctx)) { + ctx->ticks = 0; + ctx->expired = 0; + res = -ECANCELED; + } + if (ctx->ticks) { ticks = ctx->ticks; - /* - * If clock has changed, we do not care about the - * ticks and we do not rearm the timer. Userspace must - * reevaluate anyway. - */ - if (timerfd_canceled(ctx)) { - ticks = 0; - ctx->expired = 0; - res = -ECANCELED; - } - if (ctx->expired && ctx->tintv.tv64) { /* * If tintv.tv64 != 0, this is a periodic timer that @@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, return PTR_ERR(file); ctx = file->private_data; + timerfd_setup_cancel(ctx, flags); + /* * We need to stop the existing timer before reprogramming * it to the new values. -- cgit v1.1