diff options
Diffstat (limited to 'kernel/futex.c')
-rw-r--r-- | kernel/futex.c | 213 |
1 files changed, 52 insertions, 161 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index 7517c78..c7c19cb 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -588,55 +588,6 @@ void exit_pi_state_list(struct task_struct *curr) raw_spin_unlock_irq(&curr->pi_lock); } -/* - * We need to check the following states: - * - * Waiter | pi_state | pi->owner | uTID | uODIED | ? - * - * [1] NULL | --- | --- | 0 | 0/1 | Valid - * [2] NULL | --- | --- | >0 | 0/1 | Valid - * - * [3] Found | NULL | -- | Any | 0/1 | Invalid - * - * [4] Found | Found | NULL | 0 | 1 | Valid - * [5] Found | Found | NULL | >0 | 1 | Invalid - * - * [6] Found | Found | task | 0 | 1 | Valid - * - * [7] Found | Found | NULL | Any | 0 | Invalid - * - * [8] Found | Found | task | ==taskTID | 0/1 | Valid - * [9] Found | Found | task | 0 | 0 | Invalid - * [10] Found | Found | task | !=taskTID | 0/1 | Invalid - * - * [1] Indicates that the kernel can acquire the futex atomically. We - * came came here due to a stale FUTEX_WAITERS/FUTEX_OWNER_DIED bit. - * - * [2] Valid, if TID does not belong to a kernel thread. If no matching - * thread is found then it indicates that the owner TID has died. - * - * [3] Invalid. The waiter is queued on a non PI futex - * - * [4] Valid state after exit_robust_list(), which sets the user space - * value to FUTEX_WAITERS | FUTEX_OWNER_DIED. - * - * [5] The user space value got manipulated between exit_robust_list() - * and exit_pi_state_list() - * - * [6] Valid state after exit_pi_state_list() which sets the new owner in - * the pi_state but cannot access the user space value. - * - * [7] pi_state->owner can only be NULL when the OWNER_DIED bit is set. - * - * [8] Owner and user space value match - * - * [9] There is no transient state which sets the user space TID to 0 - * except exit_robust_list(), but this is indicated by the - * FUTEX_OWNER_DIED bit. See [4] - * - * [10] There is no transient state which leaves owner and user space - * TID out of sync. - */ static int lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, union futex_key *key, struct futex_pi_state **ps) @@ -652,13 +603,12 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, plist_for_each_entry_safe(this, next, head, list) { if (match_futex(&this->key, key)) { /* - * Sanity check the waiter before increasing - * the refcount and attaching to it. + * Another waiter already exists - bump up + * the refcount and return its pi_state: */ pi_state = this->pi_state; /* - * Userspace might have messed up non-PI and - * PI futexes [3] + * Userspace might have messed up non-PI and PI futexes */ if (unlikely(!pi_state)) return -EINVAL; @@ -666,70 +616,34 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, WARN_ON(!atomic_read(&pi_state->refcount)); /* - * Handle the owner died case: + * When pi_state->owner is NULL then the owner died + * and another waiter is on the fly. pi_state->owner + * is fixed up by the task which acquires + * pi_state->rt_mutex. + * + * We do not check for pid == 0 which can happen when + * the owner died and robust_list_exit() cleared the + * TID. */ - if (uval & FUTEX_OWNER_DIED) { + if (pid && pi_state->owner) { /* - * exit_pi_state_list sets owner to NULL and - * wakes the topmost waiter. The task which - * acquires the pi_state->rt_mutex will fixup - * owner. + * Bail out if user space manipulated the + * futex value. */ - if (!pi_state->owner) { - /* - * No pi state owner, but the user - * space TID is not 0. Inconsistent - * state. [5] - */ - if (pid) - return -EINVAL; - /* - * Take a ref on the state and - * return. [4] - */ - goto out_state; - } - - /* - * If TID is 0, then either the dying owner - * has not yet executed exit_pi_state_list() - * or some waiter acquired the rtmutex in the - * pi state, but did not yet fixup the TID in - * user space. - * - * Take a ref on the state and return. [6] - */ - if (!pid) - goto out_state; - } else { - /* - * If the owner died bit is not set, - * then the pi_state must have an - * owner. [7] - */ - if (!pi_state->owner) + if (pid != task_pid_vnr(pi_state->owner)) return -EINVAL; } - /* - * Bail out if user space manipulated the - * futex value. If pi state exists then the - * owner TID must be the same as the user - * space TID. [9/10] - */ - if (pid != task_pid_vnr(pi_state->owner)) - return -EINVAL; - - out_state: atomic_inc(&pi_state->refcount); *ps = pi_state; + return 0; } } /* * We are the first waiter - try to look up the real owner and attach - * the new pi_state to it, but bail out when TID = 0 [1] + * the new pi_state to it, but bail out when TID = 0 */ if (!pid) return -ESRCH; @@ -737,11 +651,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, if (!p) return -ESRCH; - if (!p->mm) { - put_task_struct(p); - return -EPERM; - } - /* * We need to look at the task state flags to figure out, * whether the task is exiting. To protect against the do_exit @@ -762,9 +671,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, return ret; } - /* - * No existing pi state. First waiter. [2] - */ pi_state = alloc_pi_state(); /* @@ -836,18 +742,10 @@ retry: return -EDEADLK; /* - * Surprise - we got the lock, but we do not trust user space at all. + * Surprise - we got the lock. Just return to userspace: */ - if (unlikely(!curval)) { - /* - * We verify whether there is kernel state for this - * futex. If not, we can safely assume, that the 0 -> - * TID transition is correct. If state exists, we do - * not bother to fixup the user space state as it was - * corrupted already. - */ - return futex_top_waiter(hb, key) ? -EINVAL : 1; - } + if (unlikely(!curval)) + return 1; uval = curval; @@ -977,7 +875,6 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) struct task_struct *new_owner; struct futex_pi_state *pi_state = this->pi_state; u32 curval, newval; - int ret = 0; if (!pi_state) return -EINVAL; @@ -1001,19 +898,23 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) new_owner = this->task; /* - * We pass it to the next owner. The WAITERS bit is always - * kept enabled while there is PI state around. We cleanup the - * owner died bit, because we are the owner. + * We pass it to the next owner. (The WAITERS bit is always + * kept enabled while there is PI state around. We must also + * preserve the owner died bit.) */ - newval = FUTEX_WAITERS | task_pid_vnr(new_owner); + if (!(uval & FUTEX_OWNER_DIED)) { + int ret = 0; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) - ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; - if (ret) { - raw_spin_unlock(&pi_state->pi_mutex.wait_lock); - return ret; + newval = FUTEX_WAITERS | task_pid_vnr(new_owner); + + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + ret = -EFAULT; + else if (curval != uval) + ret = -EINVAL; + if (ret) { + raw_spin_unlock(&pi_state->pi_mutex.wait_lock); + return ret; + } } raw_spin_lock_irq(&pi_state->owner->pi_lock); @@ -1292,7 +1193,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key, * * Returns: * 0 - failed to acquire the lock atomicly - * >0 - acquired the lock, return value is vpid of the top_waiter + * 1 - acquired the lock * <0 - error */ static int futex_proxy_trylock_atomic(u32 __user *pifutex, @@ -1303,7 +1204,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, { struct futex_q *top_waiter = NULL; u32 curval; - int ret, vpid; + int ret; if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; @@ -1331,14 +1232,11 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, * the contended case or if set_waiters is 1. The pi_state is returned * in ps in contended cases. */ - vpid = task_pid_vnr(top_waiter->task); ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task, set_waiters); - if (ret == 1) { + if (ret == 1) requeue_pi_wake_futex(top_waiter, key2, hb2); - return vpid; - } return ret; } @@ -1370,6 +1268,7 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags, struct futex_hash_bucket *hb1, *hb2; struct plist_head *head1; struct futex_q *this, *next; + u32 curval2; if (requeue_pi) { /* @@ -1471,25 +1370,16 @@ retry_private: * At this point the top_waiter has either taken uaddr2 or is * waiting on it. If the former, then the pi_state will not * exist yet, look it up one more time to ensure we have a - * reference to it. If the lock was taken, ret contains the - * vpid of the top waiter task. + * reference to it. */ - if (ret > 0) { + if (ret == 1) { WARN_ON(pi_state); drop_count++; task_count++; - /* - * If we acquired the lock, then the user - * space value of uaddr2 should be vpid. It - * cannot be changed by the top waiter as it - * is blocked on hb2 lock if it tries to do - * so. If something fiddled with it behind our - * back the pi state lookup might unearth - * it. So we rather use the known value than - * rereading and handing potential crap to - * lookup_pi_state. - */ - ret = lookup_pi_state(ret, hb2, &key2, &pi_state); + ret = get_futex_value_locked(&curval2, uaddr2); + if (!ret) + ret = lookup_pi_state(curval2, hb2, &key2, + &pi_state); } switch (ret) { @@ -2259,10 +2149,9 @@ retry: /* * To avoid races, try to do the TID -> 0 atomic transition * again. If it succeeds then we can return without waking - * anyone else up. We only try this if neither the waiters nor - * the owner died bit are set. + * anyone else up: */ - if (!(uval & ~FUTEX_TID_MASK) && + if (!(uval & FUTEX_OWNER_DIED) && cmpxchg_futex_value_locked(&uval, uaddr, vpid, 0)) goto pi_faulted; /* @@ -2294,9 +2183,11 @@ retry: /* * No waiters - kernel unlocks the futex: */ - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; + if (!(uval & FUTEX_OWNER_DIED)) { + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; + } out_unlock: spin_unlock(&hb->lock); |