diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 75 | ||||
-rw-r--r-- | kernel/cpuset.c | 24 | ||||
-rw-r--r-- | kernel/futex.c | 121 | ||||
-rw-r--r-- | kernel/futex_compat.c | 34 |
4 files changed, 165 insertions, 89 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 70fbf2e..f230f9a 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -16,56 +16,48 @@ #include <linux/mutex.h> /* This protects CPUs going up and down... */ -static DEFINE_MUTEX(cpucontrol); +static DEFINE_MUTEX(cpu_add_remove_lock); +static DEFINE_MUTEX(cpu_bitmask_lock); static __cpuinitdata BLOCKING_NOTIFIER_HEAD(cpu_chain); #ifdef CONFIG_HOTPLUG_CPU -static struct task_struct *lock_cpu_hotplug_owner; -static int lock_cpu_hotplug_depth; -static int __lock_cpu_hotplug(int interruptible) -{ - int ret = 0; - - if (lock_cpu_hotplug_owner != current) { - if (interruptible) - ret = mutex_lock_interruptible(&cpucontrol); - else - mutex_lock(&cpucontrol); - } - - /* - * Set only if we succeed in locking - */ - if (!ret) { - lock_cpu_hotplug_depth++; - lock_cpu_hotplug_owner = current; - } - - return ret; -} +/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */ +static struct task_struct *recursive; +static int recursive_depth; void lock_cpu_hotplug(void) { - __lock_cpu_hotplug(0); + struct task_struct *tsk = current; + + if (tsk == recursive) { + static int warnings = 10; + if (warnings) { + printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n"); + WARN_ON(1); + warnings--; + } + recursive_depth++; + return; + } + mutex_lock(&cpu_bitmask_lock); + recursive = tsk; } EXPORT_SYMBOL_GPL(lock_cpu_hotplug); void unlock_cpu_hotplug(void) { - if (--lock_cpu_hotplug_depth == 0) { - lock_cpu_hotplug_owner = NULL; - mutex_unlock(&cpucontrol); + WARN_ON(recursive != current); + if (recursive_depth) { + recursive_depth--; + return; } + mutex_unlock(&cpu_bitmask_lock); + recursive = NULL; } EXPORT_SYMBOL_GPL(unlock_cpu_hotplug); -int lock_cpu_hotplug_interruptible(void) -{ - return __lock_cpu_hotplug(1); -} -EXPORT_SYMBOL_GPL(lock_cpu_hotplug_interruptible); #endif /* CONFIG_HOTPLUG_CPU */ /* Need to know about CPUs going up/down? */ @@ -122,9 +114,7 @@ int cpu_down(unsigned int cpu) struct task_struct *p; cpumask_t old_allowed, tmp; - if ((err = lock_cpu_hotplug_interruptible()) != 0) - return err; - + mutex_lock(&cpu_add_remove_lock); if (num_online_cpus() == 1) { err = -EBUSY; goto out; @@ -150,7 +140,10 @@ int cpu_down(unsigned int cpu) cpu_clear(cpu, tmp); set_cpus_allowed(current, tmp); + mutex_lock(&cpu_bitmask_lock); p = __stop_machine_run(take_cpu_down, NULL, cpu); + mutex_unlock(&cpu_bitmask_lock); + if (IS_ERR(p)) { /* CPU didn't die: tell everyone. Can't complain. */ if (blocking_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED, @@ -187,7 +180,7 @@ out_thread: out_allowed: set_cpus_allowed(current, old_allowed); out: - unlock_cpu_hotplug(); + mutex_unlock(&cpu_add_remove_lock); return err; } #endif /*CONFIG_HOTPLUG_CPU*/ @@ -197,9 +190,7 @@ int __devinit cpu_up(unsigned int cpu) int ret; void *hcpu = (void *)(long)cpu; - if ((ret = lock_cpu_hotplug_interruptible()) != 0) - return ret; - + mutex_lock(&cpu_add_remove_lock); if (cpu_online(cpu) || !cpu_present(cpu)) { ret = -EINVAL; goto out; @@ -214,7 +205,9 @@ int __devinit cpu_up(unsigned int cpu) } /* Arch-specific enabling code. */ + mutex_lock(&cpu_bitmask_lock); ret = __cpu_up(cpu); + mutex_unlock(&cpu_bitmask_lock); if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); @@ -227,6 +220,6 @@ out_notify: blocking_notifier_call_chain(&cpu_chain, CPU_UP_CANCELED, hcpu); out: - unlock_cpu_hotplug(); + mutex_unlock(&cpu_add_remove_lock); return ret; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index c232dc0..1a649f2 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -762,6 +762,8 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) * * Call with manage_mutex held. May nest a call to the * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. + * Must not be called holding callback_mutex, because we must + * not call lock_cpu_hotplug() while holding callback_mutex. */ static void update_cpu_domains(struct cpuset *cur) @@ -781,7 +783,7 @@ static void update_cpu_domains(struct cpuset *cur) if (is_cpu_exclusive(c)) cpus_andnot(pspan, pspan, c->cpus_allowed); } - if (is_removed(cur) || !is_cpu_exclusive(cur)) { + if (!is_cpu_exclusive(cur)) { cpus_or(pspan, pspan, cur->cpus_allowed); if (cpus_equal(pspan, cur->cpus_allowed)) return; @@ -1917,6 +1919,17 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); } +/* + * Locking note on the strange update_flag() call below: + * + * If the cpuset being removed is marked cpu_exclusive, then simulate + * turning cpu_exclusive off, which will call update_cpu_domains(). + * The lock_cpu_hotplug() call in update_cpu_domains() must not be + * made while holding callback_mutex. Elsewhere the kernel nests + * callback_mutex inside lock_cpu_hotplug() calls. So the reverse + * nesting would risk an ABBA deadlock. + */ + static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) { struct cpuset *cs = dentry->d_fsdata; @@ -1936,11 +1949,16 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) mutex_unlock(&manage_mutex); return -EBUSY; } + if (is_cpu_exclusive(cs)) { + int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); + if (retval < 0) { + mutex_unlock(&manage_mutex); + return retval; + } + } parent = cs->parent; mutex_lock(&callback_mutex); set_bit(CS_REMOVED, &cs->flags); - if (is_cpu_exclusive(cs)) - update_cpu_domains(cs); list_del(&cs->sibling); /* delete my sibling from parent->children */ spin_lock(&cs->dentry->d_lock); d = dget(cs->dentry); diff --git a/kernel/futex.c b/kernel/futex.c index cf0c8e2..dda2049 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -415,15 +415,15 @@ out_unlock: */ void exit_pi_state_list(struct task_struct *curr) { - struct futex_hash_bucket *hb; struct list_head *next, *head = &curr->pi_state_list; struct futex_pi_state *pi_state; + struct futex_hash_bucket *hb; union futex_key key; /* * We are a ZOMBIE and nobody can enqueue itself on * pi_state_list anymore, but we have to be careful - * versus waiters unqueueing themselfs + * versus waiters unqueueing themselves: */ spin_lock_irq(&curr->pi_lock); while (!list_empty(head)) { @@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr) next = head->next; pi_state = list_entry(next, struct futex_pi_state, list); key = pi_state->key; + hb = hash_futex(&key); spin_unlock_irq(&curr->pi_lock); - hb = hash_futex(&key); spin_lock(&hb->lock); spin_lock_irq(&curr->pi_lock); + /* + * We dropped the pi-lock, so re-check whether this + * task still owns the PI-state: + */ if (head->next != next) { spin_unlock(&hb->lock); continue; } - list_del_init(&pi_state->list); - WARN_ON(pi_state->owner != curr); - + WARN_ON(list_empty(&pi_state->list)); + list_del_init(&pi_state->list); pi_state->owner = NULL; spin_unlock_irq(&curr->pi_lock); @@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) head = &hb->chain; list_for_each_entry_safe(this, next, head, list) { - if (match_futex (&this->key, &me->key)) { + if (match_futex(&this->key, &me->key)) { /* * Another waiter already exists - bump up * the refcount and return its pi_state: @@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) if (unlikely(!pi_state)) return -EINVAL; + WARN_ON(!atomic_read(&pi_state->refcount)); + atomic_inc(&pi_state->refcount); me->pi_state = pi_state; @@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) } /* - * We are the first waiter - try to look up the real owner and - * attach the new pi_state to it: + * We are the first waiter - try to look up the real owner and attach + * the new pi_state to it, but bail out when the owner died bit is set + * and TID = 0: */ pid = uval & FUTEX_TID_MASK; + if (!pid && (uval & FUTEX_OWNER_DIED)) + return -ESRCH; p = futex_find_get_task(pid); if (!p) return -ESRCH; @@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me) pi_state->key = me->key; spin_lock_irq(&p->pi_lock); + WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &p->pi_state_list); pi_state->owner = p; spin_unlock_irq(&p->pi_lock); @@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this) * kept enabled while there is PI state around. We must also * preserve the owner died bit.) */ - newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid; + if (!(uval & FUTEX_OWNER_DIED)) { + newval = FUTEX_WAITERS | new_owner->pid; - inc_preempt_count(); - curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); - dec_preempt_count(); + inc_preempt_count(); + curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval); + dec_preempt_count(); + if (curval == -EFAULT) + return -EFAULT; + if (curval != uval) + return -EINVAL; + } - if (curval == -EFAULT) - return -EFAULT; - if (curval != uval) - return -EINVAL; + spin_lock_irq(&pi_state->owner->pi_lock); + WARN_ON(list_empty(&pi_state->list)); + list_del_init(&pi_state->list); + spin_unlock_irq(&pi_state->owner->pi_lock); - list_del_init(&pi_state->owner->pi_state_list); + spin_lock_irq(&new_owner->pi_lock); + WARN_ON(!list_empty(&pi_state->list)); list_add(&pi_state->list, &new_owner->pi_state_list); pi_state->owner = new_owner; + spin_unlock_irq(&new_owner->pi_lock); + rt_mutex_unlock(&pi_state->pi_mutex); return 0; @@ -1236,6 +1254,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, /* Owner died? */ if (q.pi_state->owner != NULL) { spin_lock_irq(&q.pi_state->owner->pi_lock); + WARN_ON(list_empty(&q.pi_state->list)); list_del_init(&q.pi_state->list); spin_unlock_irq(&q.pi_state->owner->pi_lock); } else @@ -1244,6 +1263,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock, q.pi_state->owner = current; spin_lock_irq(¤t->pi_lock); + WARN_ON(!list_empty(&q.pi_state->list)); list_add(&q.pi_state->list, ¤t->pi_state_list); spin_unlock_irq(¤t->pi_lock); @@ -1427,9 +1447,11 @@ retry_locked: * again. If it succeeds then we can return without waking * anyone else up: */ - inc_preempt_count(); - uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); - dec_preempt_count(); + if (!(uval & FUTEX_OWNER_DIED)) { + inc_preempt_count(); + uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0); + dec_preempt_count(); + } if (unlikely(uval == -EFAULT)) goto pi_faulted; @@ -1462,9 +1484,11 @@ retry_locked: /* * No waiters - kernel unlocks the futex: */ - ret = unlock_futex_pi(uaddr, uval); - if (ret == -EFAULT) - goto pi_faulted; + if (!(uval & FUTEX_OWNER_DIED)) { + ret = unlock_futex_pi(uaddr, uval); + if (ret == -EFAULT) + goto pi_faulted; + } out_unlock: spin_unlock(&hb->lock); @@ -1683,9 +1707,9 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) +int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi) { - u32 uval, nval; + u32 uval, nval, mval; retry: if (get_user(uval, uaddr)) @@ -1702,21 +1726,45 @@ retry: * thread-death.) The rest of the cleanup is done in * userspace. */ - nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, - uval | FUTEX_OWNER_DIED); + mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; + nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval); + if (nval == -EFAULT) return -1; if (nval != uval) goto retry; - if (uval & FUTEX_WAITERS) - futex_wake(uaddr, 1); + /* + * Wake robust non-PI futexes here. The wakeup of + * PI futexes happens in exit_pi_state(): + */ + if (!pi) { + if (uval & FUTEX_WAITERS) + futex_wake(uaddr, 1); + } } return 0; } /* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int fetch_robust_entry(struct robust_list __user **entry, + struct robust_list __user **head, int *pi) +{ + unsigned long uentry; + + if (get_user(uentry, (unsigned long *)head)) + return -EFAULT; + + *entry = (void *)(uentry & ~1UL); + *pi = uentry & 1; + + return 0; +} + +/* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. * @@ -1726,14 +1774,14 @@ void exit_robust_list(struct task_struct *curr) { struct robust_list_head __user *head = curr->robust_list; struct robust_list __user *entry, *pending; - unsigned int limit = ROBUST_LIST_LIMIT; + unsigned int limit = ROBUST_LIST_LIMIT, pi, pip; unsigned long futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(entry, &head->list.next)) + if (fetch_robust_entry(&entry, &head->list.next, &pi)) return; /* * Fetch the relative futex offset: @@ -1744,10 +1792,11 @@ void exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(pending, &head->list_op_pending)) + if (fetch_robust_entry(&pending, &head->list_op_pending, &pip)) return; + if (pending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pip); while (entry != &head->list) { /* @@ -1756,12 +1805,12 @@ void exit_robust_list(struct task_struct *curr) */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(entry, &entry->next)) + if (fetch_robust_entry(&entry, &entry->next, &pi)) return; /* * Avoid excessively long or circular lists: diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index d1d92b4..d1aab1a 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -12,6 +12,23 @@ #include <asm/uaccess.h> + +/* + * Fetch a robust-list pointer. Bit 0 signals PI futexes: + */ +static inline int +fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry, + compat_uptr_t *head, int *pi) +{ + if (get_user(*uentry, head)) + return -EFAULT; + + *entry = compat_ptr((*uentry) & ~1); + *pi = (unsigned int)(*uentry) & 1; + + return 0; +} + /* * Walk curr->robust_list (very carefully, it's a userspace list!) * and mark any locks found there dead, and notify any waiters. @@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr) { struct compat_robust_list_head __user *head = curr->compat_robust_list; struct robust_list __user *entry, *pending; + unsigned int limit = ROBUST_LIST_LIMIT, pi; compat_uptr_t uentry, upending; - unsigned int limit = ROBUST_LIST_LIMIT; compat_long_t futex_offset; /* * Fetch the list head (which was registered earlier, via * sys_set_robust_list()): */ - if (get_user(uentry, &head->list.next)) + if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi)) return; - entry = compat_ptr(uentry); /* * Fetch the relative futex offset: */ @@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr) * Fetch any possibly pending lock-add first, and handle it * if it exists: */ - if (get_user(upending, &head->list_op_pending)) + if (fetch_robust_entry(&upending, &pending, + &head->list_op_pending, &pi)) return; - pending = compat_ptr(upending); if (upending) - handle_futex_death((void *)pending + futex_offset, curr); + handle_futex_death((void *)pending + futex_offset, curr, pi); while (compat_ptr(uentry) != &head->list) { /* @@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr) */ if (entry != pending) if (handle_futex_death((void *)entry + futex_offset, - curr)) + curr, pi)) return; /* * Fetch the next entry in the list: */ - if (get_user(uentry, (compat_uptr_t *)&entry->next)) + if (fetch_robust_entry(&uentry, &entry, + (compat_uptr_t *)&entry->next, &pi)) return; - entry = compat_ptr(uentry); /* * Avoid excessively long or circular lists: */ |