From c672af35d54992b88d3c48133bd62cc3386fb2f9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:36:59 +0100 Subject: signal: Fix SIGCONT notification code After a task receives SIGCONT, its parent is notified via SIGCHLD with its siginfo describing what the notified event is. If SIGCONT is received while the child process is stopped, the code should be CLD_CONTINUED. If SIGCONT is recieved while the child process is in the process of being stopped, it should be CLD_STOPPED. Which code to use is determined in prepare_signal() and recorded in signal->flags using SIGNAL_CLD_CONTINUED|STOP flags. get_signal_deliver() should test these flags and then notify accoringly; however, it incorrectly tested SIGNAL_STOP_CONTINUED instead of SIGNAL_CLD_CONTINUED, thus incorrectly notifying CLD_CONTINUED if the signal is delivered before the task is wait(2)ed and CLD_STOPPED if the state was fetched already. Fix it by testing SIGNAL_CLD_CONTINUED. While at it, uncompress the ?: test into if/else clause for better readability. Signed-off-by: Tejun Heo Reviewed-by: Oleg Nesterov Acked-by: Roland McGrath --- kernel/signal.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 3175186..e26274a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1853,8 +1853,13 @@ relock: * the CLD_ si_code into SIGNAL_CLD_MASK bits. */ if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { - int why = (signal->flags & SIGNAL_STOP_CONTINUED) - ? CLD_CONTINUED : CLD_STOPPED; + int why; + + if (signal->flags & SIGNAL_CLD_CONTINUED) + why = CLD_CONTINUED; + else + why = CLD_STOPPED; + signal->flags &= ~SIGNAL_CLD_MASK; why = tracehook_notify_jctl(why, CLD_CONTINUED); -- cgit v1.1 From 71db5eb99c960e9c30e4b3ed04103c513b6251b5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: signal: Remove superflous try_to_freeze() loop in do_signal_stop() do_signal_stop() is used only by get_signal_to_deliver() and after a successful signal stop, it always calls try_to_freeze(), so the try_to_freeze() loop around schedule() in do_signal_stop() is superflous and confusing. Remove it. Signed-off-by: Tejun Heo Acked-by: Rafael J. Wysocki Acked-by: Oleg Nesterov Acked-by: Roland McGrath --- kernel/signal.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index e26274a..f4db769 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1781,9 +1781,7 @@ static int do_signal_stop(int signr) } /* Now we don't run again until woken by SIGCONT or SIGKILL */ - do { - schedule(); - } while (try_to_freeze()); + schedule(); tracehook_finish_jctl(); current->exit_code = 0; -- cgit v1.1 From edf2ed153bcae52de70db00a98b0e81a5668e563 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Kill tracehook_notify_jctl() tracehook_notify_jctl() aids in determining whether and what to report to the parent when a task is stopped or continued. The function also adds an extra requirement that siglock may be released across it, which is currently unused and quite difficult to satisfy in well-defined manner. As job control and the notifications are about to receive major overhaul, remove the tracehook and open code it. If ever necessary, let's factor it out after the overhaul. * Oleg spotted incorrect CLD_CONTINUED/STOPPED selection when ptraced. Fixed. Signed-off-by: Tejun Heo Cc: Oleg Nesterov Cc: Roland McGrath --- kernel/signal.c | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index f4db769..03d874e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1727,7 +1727,7 @@ void ptrace_notify(int exit_code) static int do_signal_stop(int signr) { struct signal_struct *sig = current->signal; - int notify; + int notify = 0; if (!sig->group_stop_count) { struct task_struct *t; @@ -1759,19 +1759,16 @@ static int do_signal_stop(int signr) * a group stop in progress and we are the last to stop, report * to the parent. When ptraced, every thread reports itself. */ - notify = sig->group_stop_count == 1 ? CLD_STOPPED : 0; - notify = tracehook_notify_jctl(notify, CLD_STOPPED); - /* - * tracehook_notify_jctl() can drop and reacquire siglock, so - * we keep ->group_stop_count != 0 before the call. If SIGCONT - * or SIGKILL comes in between ->group_stop_count == 0. - */ - if (sig->group_stop_count) { - if (!--sig->group_stop_count) - sig->flags = SIGNAL_STOP_STOPPED; - current->exit_code = sig->group_exit_code; - __set_current_state(TASK_STOPPED); + if (!--sig->group_stop_count) { + sig->flags = SIGNAL_STOP_STOPPED; + notify = CLD_STOPPED; } + if (task_ptrace(current)) + notify = CLD_STOPPED; + + current->exit_code = sig->group_exit_code; + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); if (notify) { @@ -1860,14 +1857,11 @@ relock: signal->flags &= ~SIGNAL_CLD_MASK; - why = tracehook_notify_jctl(why, CLD_CONTINUED); spin_unlock_irq(&sighand->siglock); - if (why) { - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current->group_leader, why); - read_unlock(&tasklist_lock); - } + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current->group_leader, why); + read_unlock(&tasklist_lock); goto relock; } @@ -2034,7 +2028,7 @@ void exit_signals(struct task_struct *tsk) if (unlikely(tsk->signal->group_stop_count) && !--tsk->signal->group_stop_count) { tsk->signal->flags = SIGNAL_STOP_STOPPED; - group_stop = tracehook_notify_jctl(CLD_STOPPED, CLD_STOPPED); + group_stop = CLD_STOPPED; } out: spin_unlock_irq(&tsk->sighand->siglock); -- cgit v1.1 From fe1bc6a0954611b806f9e158eb0817cf8ba21660 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Add @why to ptrace_stop() To prepare for cleanup of the interaction between group stop and ptrace, add @why to ptrace_stop(). Existing users are updated such that there is no behavior change. Signed-off-by: Tejun Heo Acked-by: Roland McGrath --- kernel/signal.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 03d874e..95ac42d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1617,7 +1617,7 @@ static int sigkill_pending(struct task_struct *tsk) * If we actually decide not to stop at all because the tracer * is gone, we keep current->exit_code unless clear_code. */ -static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) +static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { @@ -1655,7 +1655,7 @@ static void ptrace_stop(int exit_code, int clear_code, siginfo_t *info) spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); if (may_ptrace_stop()) { - do_notify_parent_cldstop(current, CLD_TRAPPED); + do_notify_parent_cldstop(current, why); /* * Don't want to allow preemption here, because * sys_ptrace() needs this task to be inactive. @@ -1714,7 +1714,7 @@ void ptrace_notify(int exit_code) /* Let the debugger run. */ spin_lock_irq(¤t->sighand->siglock); - ptrace_stop(exit_code, 1, &info); + ptrace_stop(exit_code, CLD_TRAPPED, 1, &info); spin_unlock_irq(¤t->sighand->siglock); } @@ -1795,7 +1795,7 @@ static int ptrace_signal(int signr, siginfo_t *info, ptrace_signal_deliver(regs, cookie); /* Let the debugger run. */ - ptrace_stop(signr, 0, info); + ptrace_stop(signr, CLD_TRAPPED, 0, info); /* We're back. Did the debugger cancel the sig? */ signr = current->exit_code; -- cgit v1.1 From e5c1902e9260a0075ea52cb5ef627a8d9aaede89 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: signal: Fix premature completion of group stop when interfered by ptrace task->signal->group_stop_count is used to track the progress of group stop. It's initialized to the number of tasks which need to stop for group stop to finish and each stopping or trapping task decrements. However, each task doesn't keep track of whether it decremented the counter or not and if woken up before the group stop is complete and stops again, it can decrement the counter multiple times. Please consider the following example code. static void *worker(void *arg) { while (1) ; return NULL; } int main(void) { pthread_t thread; pid_t pid; int i; pid = fork(); if (!pid) { for (i = 0; i < 5; i++) pthread_create(&thread, NULL, worker, NULL); while (1) ; return 0; } ptrace(PTRACE_ATTACH, pid, NULL, NULL); while (1) { waitid(P_PID, pid, NULL, WSTOPPED); ptrace(PTRACE_SINGLESTEP, pid, NULL, (void *)(long)SIGSTOP); } return 0; } The child creates five threads and the parent continuously traps the first thread and whenever the child gets a signal, SIGSTOP is delivered. If an external process sends SIGSTOP to the child, all other threads in the process should reliably stop. However, due to the above bug, the first thread will often end up consuming group_stop_count multiple times and SIGSTOP often ends up stopping none or part of the other four threads. This patch adds a new field task->group_stop which is protected by siglock and uses GROUP_STOP_CONSUME flag to track which task is still to consume group_stop_count to fix this bug. task_clear_group_stop_pending() and task_participate_group_stop() are added to help manipulating group stop states. As ptrace_stop() now also uses task_participate_group_stop(), it will set SIGNAL_STOP_STOPPED if it completes a group stop. There still are many issues regarding the interaction between group stop and ptrace. Patches to address them will follow. - Oleg spotted duplicate GROUP_STOP_CONSUME. Dropped. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath --- kernel/signal.c | 62 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 8 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 95ac42d..ecb2008 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -223,6 +223,52 @@ static inline void print_dropped_signal(int sig) current->comm, current->pid, sig); } +/** + * task_clear_group_stop_pending - clear pending group stop + * @task: target task + * + * Clear group stop states for @task. + * + * CONTEXT: + * Must be called with @task->sighand->siglock held. + */ +static void task_clear_group_stop_pending(struct task_struct *task) +{ + task->group_stop &= ~GROUP_STOP_CONSUME; +} + +/** + * task_participate_group_stop - participate in a group stop + * @task: task participating in a group stop + * + * @task is participating in a group stop. Group stop states are cleared + * and the group stop count is consumed if %GROUP_STOP_CONSUME was set. If + * the consumption completes the group stop, the appropriate %SIGNAL_* + * flags are set. + * + * CONTEXT: + * Must be called with @task->sighand->siglock held. + */ +static bool task_participate_group_stop(struct task_struct *task) +{ + struct signal_struct *sig = task->signal; + bool consume = task->group_stop & GROUP_STOP_CONSUME; + + task_clear_group_stop_pending(task); + + if (!consume) + return false; + + if (!WARN_ON_ONCE(sig->group_stop_count == 0)) + sig->group_stop_count--; + + if (!sig->group_stop_count) { + sig->flags = SIGNAL_STOP_STOPPED; + return true; + } + return false; +} + /* * allocate a new signal queue record * - this may be called without locks if and only if t == current, otherwise an @@ -1645,7 +1691,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) * we must participate in the bookkeeping. */ if (current->signal->group_stop_count > 0) - --current->signal->group_stop_count; + task_participate_group_stop(current); current->last_siginfo = info; current->exit_code = exit_code; @@ -1730,6 +1776,7 @@ static int do_signal_stop(int signr) int notify = 0; if (!sig->group_stop_count) { + unsigned int gstop = GROUP_STOP_CONSUME; struct task_struct *t; if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || @@ -1741,6 +1788,7 @@ static int do_signal_stop(int signr) */ sig->group_exit_code = signr; + current->group_stop = gstop; sig->group_stop_count = 1; for (t = next_thread(current); t != current; t = next_thread(t)) /* @@ -1750,19 +1798,19 @@ static int do_signal_stop(int signr) */ if (!(t->flags & PF_EXITING) && !task_is_stopped_or_traced(t)) { + t->group_stop = gstop; sig->group_stop_count++; signal_wake_up(t, 0); - } + } else + task_clear_group_stop_pending(t); } /* * If there are no other threads in the group, or if there is * a group stop in progress and we are the last to stop, report * to the parent. When ptraced, every thread reports itself. */ - if (!--sig->group_stop_count) { - sig->flags = SIGNAL_STOP_STOPPED; + if (task_participate_group_stop(current)) notify = CLD_STOPPED; - } if (task_ptrace(current)) notify = CLD_STOPPED; @@ -2026,10 +2074,8 @@ void exit_signals(struct task_struct *tsk) recalc_sigpending_and_wake(t); if (unlikely(tsk->signal->group_stop_count) && - !--tsk->signal->group_stop_count) { - tsk->signal->flags = SIGNAL_STOP_STOPPED; + task_participate_group_stop(tsk)) group_stop = CLD_STOPPED; - } out: spin_unlock_irq(&tsk->sighand->siglock); -- cgit v1.1 From 39efa3ef3a376a4e53de2f82fc91182459d34200 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: signal: Use GROUP_STOP_PENDING to stop once for a single group stop Currently task->signal->group_stop_count is used to decide whether to stop for group stop. However, if there is a task in the group which is taking a long time to stop, other tasks which are continued by ptrace would repeatedly stop for the same group stop until the group stop is complete. Conversely, if a ptraced task is in TASK_TRACED state, the debugger won't get notified of group stops which is inconsistent compared to the ptraced task in any other state. This patch introduces GROUP_STOP_PENDING which tracks whether a task is yet to stop for the group stop in progress. The flag is set when a group stop starts and cleared when the task stops the first time for the group stop, and consulted whenever whether the task should participate in a group stop needs to be determined. Note that now tasks in TASK_TRACED also participate in group stop. This results in the following behavior changes. * For a single group stop, a ptracer would see at most one stop reported. * A ptracee in TASK_TRACED now also participates in group stop and the tracer would get the notification. However, as a ptraced task could be in TASK_STOPPED state or any ptrace trap could consume group stop, the notification may still be missing. These will be addressed with further patches. * A ptracee may start a group stop while one is still in progress if the tracer let it continue with stop signal delivery. Group stop code handles this correctly. Oleg: * Spotted that a task might skip signal check even when its GROUP_STOP_PENDING is set. Fixed by updating recalc_sigpending_tsk() to check GROUP_STOP_PENDING instead of group_stop_count. * Pointed out that task->group_stop should be cleared whenever task->signal->group_stop_count is cleared. Fixed accordingly. * Pointed out the behavior inconsistency between TASK_TRACED and RUNNING and the last behavior change. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath --- kernel/signal.c | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index ecb2008..a2e7a65 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -124,7 +124,7 @@ static inline int has_pending_signals(sigset_t *signal, sigset_t *blocked) static int recalc_sigpending_tsk(struct task_struct *t) { - if (t->signal->group_stop_count > 0 || + if ((t->group_stop & GROUP_STOP_PENDING) || PENDING(&t->pending, &t->blocked) || PENDING(&t->signal->shared_pending, &t->blocked)) { set_tsk_thread_flag(t, TIF_SIGPENDING); @@ -232,19 +232,19 @@ static inline void print_dropped_signal(int sig) * CONTEXT: * Must be called with @task->sighand->siglock held. */ -static void task_clear_group_stop_pending(struct task_struct *task) +void task_clear_group_stop_pending(struct task_struct *task) { - task->group_stop &= ~GROUP_STOP_CONSUME; + task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME); } /** * task_participate_group_stop - participate in a group stop * @task: task participating in a group stop * - * @task is participating in a group stop. Group stop states are cleared - * and the group stop count is consumed if %GROUP_STOP_CONSUME was set. If - * the consumption completes the group stop, the appropriate %SIGNAL_* - * flags are set. + * @task has GROUP_STOP_PENDING set and is participating in a group stop. + * Group stop states are cleared and the group stop count is consumed if + * %GROUP_STOP_CONSUME was set. If the consumption completes the group + * stop, the appropriate %SIGNAL_* flags are set. * * CONTEXT: * Must be called with @task->sighand->siglock held. @@ -254,6 +254,8 @@ static bool task_participate_group_stop(struct task_struct *task) struct signal_struct *sig = task->signal; bool consume = task->group_stop & GROUP_STOP_CONSUME; + WARN_ON_ONCE(!(task->group_stop & GROUP_STOP_PENDING)); + task_clear_group_stop_pending(task); if (!consume) @@ -765,6 +767,9 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) t = p; do { unsigned int state; + + task_clear_group_stop_pending(t); + rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); /* * If there is a handler for SIGCONT, we must make @@ -906,6 +911,7 @@ static void complete_signal(int sig, struct task_struct *p, int group) signal->group_stop_count = 0; t = p; do { + task_clear_group_stop_pending(t); sigaddset(&t->pending.signal, SIGKILL); signal_wake_up(t, 1); } while_each_thread(p, t); @@ -1139,6 +1145,7 @@ int zap_other_threads(struct task_struct *p) p->signal->group_stop_count = 0; while_each_thread(p, t) { + task_clear_group_stop_pending(t); count++; /* Don't bother with already dead threads */ @@ -1690,7 +1697,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) * If there is a group stop in progress, * we must participate in the bookkeeping. */ - if (current->signal->group_stop_count > 0) + if (current->group_stop & GROUP_STOP_PENDING) task_participate_group_stop(current); current->last_siginfo = info; @@ -1775,8 +1782,8 @@ static int do_signal_stop(int signr) struct signal_struct *sig = current->signal; int notify = 0; - if (!sig->group_stop_count) { - unsigned int gstop = GROUP_STOP_CONSUME; + if (!(current->group_stop & GROUP_STOP_PENDING)) { + unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME; struct task_struct *t; if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || @@ -1796,8 +1803,7 @@ static int do_signal_stop(int signr) * stop is always done with the siglock held, * so this check has no races. */ - if (!(t->flags & PF_EXITING) && - !task_is_stopped_or_traced(t)) { + if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) { t->group_stop = gstop; sig->group_stop_count++; signal_wake_up(t, 0); @@ -1926,8 +1932,8 @@ relock: if (unlikely(signr != 0)) ka = return_ka; else { - if (unlikely(signal->group_stop_count > 0) && - do_signal_stop(0)) + if (unlikely(current->group_stop & + GROUP_STOP_PENDING) && do_signal_stop(0)) goto relock; signr = dequeue_signal(current, ¤t->blocked, @@ -2073,7 +2079,7 @@ void exit_signals(struct task_struct *tsk) if (!signal_pending(t) && !(t->flags & PF_EXITING)) recalc_sigpending_and_wake(t); - if (unlikely(tsk->signal->group_stop_count) && + if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) && task_participate_group_stop(tsk)) group_stop = CLD_STOPPED; out: -- cgit v1.1 From 0ae8ce1c8c5b9007ce6bfc83ec2aa0dfce5bbed3 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Participate in group stop from ptrace_stop() iff the task is trapping for group stop Currently, ptrace_stop() unconditionally participates in group stop bookkeeping. This is unnecessary and inaccurate. Make it only participate if the task is trapping for group stop - ie. if @why is CLD_STOPPED. As ptrace_stop() currently is not used when trapping for group stop, this equals to disabling group stop participation from ptrace_stop(). A visible behavior change is increased likelihood of delayed group stop completion if the thread group contains one or more ptraced tasks. This is to preapre for further cleanup of the interaction between group stop and ptrace. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath --- kernel/signal.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index a2e7a65..9f36dd2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1694,10 +1694,13 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) } /* - * If there is a group stop in progress, - * we must participate in the bookkeeping. + * If @why is CLD_STOPPED, we're trapping to participate in a group + * stop. Do the bookkeeping. Note that if SIGCONT was delievered + * while siglock was released for the arch hook, PENDING could be + * clear now. We act as if SIGCONT is received after TASK_TRACED + * is entered - ignore it. */ - if (current->group_stop & GROUP_STOP_PENDING) + if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING)) task_participate_group_stop(current); current->last_siginfo = info; -- cgit v1.1 From 5224fa3660ad3881d2f2ad726d22614117963f10 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Make do_signal_stop() use ptrace_stop() if the task is being ptraced A ptraced task would still stop at do_signal_stop() when it's stopping for stop signals and do_signal_stop() behaves the same whether the task is ptraced or not. However, in addition to stopping, ptrace_stop() also does ptrace specific stuff like calling architecture specific callbacks, so this behavior makes the code more fragile and difficult to understand. This patch makes do_signal_stop() test whether the task is ptraced and use ptrace_stop() if so. This renders tracehook_notify_jctl() rather pointless as the ptrace notification is now handled by ptrace_stop() regardless of the return value from the tracehook. It probably is a good idea to update it. This doesn't solve the whole problem as tasks already in stopped state would stay in the regular stop when ptrace attached. That part will be handled by the next patch. Oleg pointed out that this makes a userland-visible change. Before, SIGCONT would be able to wake up a task in group stop even if the task is ptraced if the tracer hasn't issued another ptrace command afterwards (as the next ptrace commands transitions the state into TASK_TRACED which ignores SIGCONT wakeups). With this and the next patch, SIGCONT may race with the transition into TASK_TRACED and is ignored if the tracee already entered TASK_TRACED. Another userland visible change of this and the next patch is that the ptracee's state would now be TASK_TRACED where it used to be TASK_STOPPED, which is visible via fs/proc. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath Cc: Jan Kratochvil --- kernel/signal.c | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 9f36dd2..418776c 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1783,7 +1783,6 @@ void ptrace_notify(int exit_code) static int do_signal_stop(int signr) { struct signal_struct *sig = current->signal; - int notify = 0; if (!(current->group_stop & GROUP_STOP_PENDING)) { unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME; @@ -1813,29 +1812,37 @@ static int do_signal_stop(int signr) } else task_clear_group_stop_pending(t); } - /* - * If there are no other threads in the group, or if there is - * a group stop in progress and we are the last to stop, report - * to the parent. When ptraced, every thread reports itself. - */ - if (task_participate_group_stop(current)) - notify = CLD_STOPPED; - if (task_ptrace(current)) - notify = CLD_STOPPED; current->exit_code = sig->group_exit_code; __set_current_state(TASK_STOPPED); - spin_unlock_irq(¤t->sighand->siglock); + if (likely(!task_ptrace(current))) { + int notify = 0; - if (notify) { - read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, notify); - read_unlock(&tasklist_lock); - } + /* + * If there are no other threads in the group, or if there + * is a group stop in progress and we are the last to stop, + * report to the parent. + */ + if (task_participate_group_stop(current)) + notify = CLD_STOPPED; - /* Now we don't run again until woken by SIGCONT or SIGKILL */ - schedule(); + spin_unlock_irq(¤t->sighand->siglock); + + if (notify) { + read_lock(&tasklist_lock); + do_notify_parent_cldstop(current, notify); + read_unlock(&tasklist_lock); + } + + /* Now we don't run again until woken by SIGCONT or SIGKILL */ + schedule(); + + spin_lock_irq(¤t->sighand->siglock); + } else + ptrace_stop(current->exit_code, CLD_STOPPED, 0, NULL); + + spin_unlock_irq(¤t->sighand->siglock); tracehook_finish_jctl(); current->exit_code = 0; -- cgit v1.1 From d79fdd6d96f46fabb779d86332e3677c6f5c2a4f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:00 +0100 Subject: ptrace: Clean transitions between TASK_STOPPED and TRACED Currently, if the task is STOPPED on ptrace attach, it's left alone and the state is silently changed to TRACED on the next ptrace call. The behavior breaks the assumption that arch_ptrace_stop() is called before any task is poked by ptrace and is ugly in that a task manipulates the state of another task directly. With GROUP_STOP_PENDING, the transitions between TASK_STOPPED and TRACED can be made clean. The tracer can use the flag to tell the tracee to retry stop on attach and detach. On retry, the tracee will enter the desired state in the correct way. The lower 16bits of task->group_stop is used to remember the signal number which caused the last group stop. This is used while retrying for ptrace attach as the original group_exit_code could have been consumed with wait(2) by then. As the real parent may wait(2) and consume the group_exit_code anytime, the group_exit_code needs to be saved separately so that it can be used when switching from regular sleep to ptrace_stop(). This is recorded in the lower 16bits of task->group_stop. If a task is already stopped and there's no intervening SIGCONT, a ptrace request immediately following a successful PTRACE_ATTACH should always succeed even if the tracer doesn't wait(2) for attach completion; however, with this change, the tracee might still be TASK_RUNNING trying to enter TASK_TRACED which would cause the following request to fail with -ESRCH. This intermediate state is hidden from the ptracer by setting GROUP_STOP_TRAPPING on attach and making ptrace_check_attach() wait for it to clear on its signal->wait_chldexit. Completing the transition or getting killed clears TRAPPING and wakes up the tracer. Note that the STOPPED -> RUNNING -> TRACED transition is still visible to other threads which are in the same group as the ptracer and the reverse transition is visible to all. Please read the comments for details. Oleg: * Spotted a race condition where a task may retry group stop without proper bookkeeping. Fixed by redoing bookkeeping on retry. * Spotted that the transition is visible to userland in several different ways. Most are fixed with GROUP_STOP_TRAPPING. Unhandled corner case is documented. * Pointed out not setting GROUP_STOP_SIGMASK on an already stopped task would result in more consistent behavior. * Pointed out that calling ptrace_stop() from do_signal_stop() in TASK_STOPPED can race with group stop start logic and then confuse the TRAPPING wait in ptrace_check_attach(). ptrace_stop() is now called with TASK_RUNNING. * Suggested using signal->wait_chldexit instead of bit wait. * Spotted a race condition between TRACED transition and clearing of TRAPPING. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov Cc: Roland McGrath Cc: Jan Kratochvil --- kernel/signal.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 66 insertions(+), 13 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 418776c..1e19991 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -224,6 +224,26 @@ static inline void print_dropped_signal(int sig) } /** + * task_clear_group_stop_trapping - clear group stop trapping bit + * @task: target task + * + * If GROUP_STOP_TRAPPING is set, a ptracer is waiting for us. Clear it + * and wake up the ptracer. Note that we don't need any further locking. + * @task->siglock guarantees that @task->parent points to the ptracer. + * + * CONTEXT: + * Must be called with @task->sighand->siglock held. + */ +static void task_clear_group_stop_trapping(struct task_struct *task) +{ + if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) { + task->group_stop &= ~GROUP_STOP_TRAPPING; + __wake_up_sync(&task->parent->signal->wait_chldexit, + TASK_UNINTERRUPTIBLE, 1); + } +} + +/** * task_clear_group_stop_pending - clear pending group stop * @task: target task * @@ -1706,8 +1726,20 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) current->last_siginfo = info; current->exit_code = exit_code; - /* Let the debugger run. */ - __set_current_state(TASK_TRACED); + /* + * TRACED should be visible before TRAPPING is cleared; otherwise, + * the tracer might fail do_wait(). + */ + set_current_state(TASK_TRACED); + + /* + * We're committing to trapping. Clearing GROUP_STOP_TRAPPING and + * transition to TASK_TRACED should be atomic with respect to + * siglock. This hsould be done after the arch hook as siglock is + * released and regrabbed across it. + */ + task_clear_group_stop_trapping(current); + spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); if (may_ptrace_stop()) { @@ -1788,6 +1820,9 @@ static int do_signal_stop(int signr) unsigned int gstop = GROUP_STOP_PENDING | GROUP_STOP_CONSUME; struct task_struct *t; + /* signr will be recorded in task->group_stop for retries */ + WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK); + if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || unlikely(signal_group_exit(sig))) return 0; @@ -1797,25 +1832,27 @@ static int do_signal_stop(int signr) */ sig->group_exit_code = signr; - current->group_stop = gstop; + current->group_stop &= ~GROUP_STOP_SIGMASK; + current->group_stop |= signr | gstop; sig->group_stop_count = 1; - for (t = next_thread(current); t != current; t = next_thread(t)) + for (t = next_thread(current); t != current; + t = next_thread(t)) { + t->group_stop &= ~GROUP_STOP_SIGMASK; /* * Setting state to TASK_STOPPED for a group * stop is always done with the siglock held, * so this check has no races. */ if (!(t->flags & PF_EXITING) && !task_is_stopped(t)) { - t->group_stop = gstop; + t->group_stop |= signr | gstop; sig->group_stop_count++; signal_wake_up(t, 0); - } else + } else { task_clear_group_stop_pending(t); + } + } } - - current->exit_code = sig->group_exit_code; - __set_current_state(TASK_STOPPED); - +retry: if (likely(!task_ptrace(current))) { int notify = 0; @@ -1827,6 +1864,7 @@ static int do_signal_stop(int signr) if (task_participate_group_stop(current)) notify = CLD_STOPPED; + __set_current_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); if (notify) { @@ -1839,13 +1877,28 @@ static int do_signal_stop(int signr) schedule(); spin_lock_irq(¤t->sighand->siglock); - } else - ptrace_stop(current->exit_code, CLD_STOPPED, 0, NULL); + } else { + ptrace_stop(current->group_stop & GROUP_STOP_SIGMASK, + CLD_STOPPED, 0, NULL); + current->exit_code = 0; + } + + /* + * GROUP_STOP_PENDING could be set if another group stop has + * started since being woken up or ptrace wants us to transit + * between TASK_STOPPED and TRACED. Retry group stop. + */ + if (current->group_stop & GROUP_STOP_PENDING) { + WARN_ON_ONCE(!(current->group_stop & GROUP_STOP_SIGMASK)); + goto retry; + } + + /* PTRACE_ATTACH might have raced with task killing, clear trapping */ + task_clear_group_stop_trapping(current); spin_unlock_irq(¤t->sighand->siglock); tracehook_finish_jctl(); - current->exit_code = 0; return 1; } -- cgit v1.1 From 408a37de6c95832a4880a88a742f89f0cc554d06 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:01 +0100 Subject: job control: Don't set group_stop exit_code if re-entering job control stop While ptraced, a task may be resumed while the containing process is still job control stopped. If the task receives another stop signal in this state, it will still initiate group stop, which generates group_exit_code, which the real parent would be able to see once the ptracer detaches. In this scenario, the real parent may see two consecutive CLD_STOPPED events from two stop signals without intervening SIGCONT, which normally is impossible. Test case follows. #include #include #include #include int main(void) { pid_t tracee; siginfo_t si; tracee = fork(); if (!tracee) while (1) pause(); kill(tracee, SIGSTOP); waitid(P_PID, tracee, &si, WSTOPPED); if (!fork()) { ptrace(PTRACE_ATTACH, tracee, NULL, NULL); waitid(P_PID, tracee, &si, WSTOPPED); ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status); waitid(P_PID, tracee, &si, WSTOPPED); ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status); waitid(P_PID, tracee, &si, WSTOPPED); ptrace(PTRACE_DETACH, tracee, NULL, NULL); return 0; } while (1) { si.si_pid = 0; waitid(P_PID, tracee, &si, WSTOPPED | WNOHANG); if (si.si_pid) printf("st=%02d c=%02d\n", si.si_status, si.si_code); } return 0; } Before the patch, the latter waitid() in polling mode reports the second stopped event generated by the implied SIGSTOP of PTRACE_ATTACH. st=19 c=05 ^C After the patch, the second event is not reported. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/signal.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 1e19991..2f2c8f6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1827,10 +1827,27 @@ static int do_signal_stop(int signr) unlikely(signal_group_exit(sig))) return 0; /* - * There is no group stop already in progress. - * We must initiate one now. + * There is no group stop already in progress. We must + * initiate one now. + * + * While ptraced, a task may be resumed while group stop is + * still in effect and then receive a stop signal and + * initiate another group stop. This deviates from the + * usual behavior as two consecutive stop signals can't + * cause two group stops when !ptraced. + * + * The condition can be distinguished by testing whether + * SIGNAL_STOP_STOPPED is already set. Don't generate + * group_exit_code in such case. + * + * This is not necessary for SIGNAL_STOP_CONTINUED because + * an intervening stop signal is required to cause two + * continued events regardless of ptrace. */ - sig->group_exit_code = signr; + if (!(sig->flags & SIGNAL_STOP_STOPPED)) + sig->group_exit_code = signr; + else + WARN_ON_ONCE(!task_ptrace(current)); current->group_stop &= ~GROUP_STOP_SIGMASK; current->group_stop |= signr | gstop; -- cgit v1.1 From 75b95953a56969a990e6ce154b260be83818fe71 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:01 +0100 Subject: job control: Add @for_ptrace to do_notify_parent_cldstop() Currently, do_notify_parent_cldstop() determines whether the notification is for the real parent or ptracer. Move the decision to the caller by adding @for_ptrace parameter to do_notify_parent_cldstop(). All the callers are updated to pass task_ptrace(target_task), so this patch doesn't cause any behavior difference. While at it, add function comment to do_notify_parent_cldstop(). Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/signal.c | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 2f2c8f6..69d6054 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1595,16 +1595,30 @@ int do_notify_parent(struct task_struct *tsk, int sig) return ret; } -static void do_notify_parent_cldstop(struct task_struct *tsk, int why) +/** + * do_notify_parent_cldstop - notify parent of stopped/continued state change + * @tsk: task reporting the state change + * @for_ptracer: the notification is for ptracer + * @why: CLD_{CONTINUED|STOPPED|TRAPPED} to report + * + * Notify @tsk's parent that the stopped/continued state has changed. If + * @for_ptracer is %false, @tsk's group leader notifies to its real parent. + * If %true, @tsk reports to @tsk->parent which should be the ptracer. + * + * CONTEXT: + * Must be called with tasklist_lock at least read locked. + */ +static void do_notify_parent_cldstop(struct task_struct *tsk, + bool for_ptracer, int why) { struct siginfo info; unsigned long flags; struct task_struct *parent; struct sighand_struct *sighand; - if (task_ptrace(tsk)) + if (for_ptracer) { parent = tsk->parent; - else { + } else { tsk = tsk->group_leader; parent = tsk->real_parent; } @@ -1743,7 +1757,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); if (may_ptrace_stop()) { - do_notify_parent_cldstop(current, why); + do_notify_parent_cldstop(current, task_ptrace(current), why); /* * Don't want to allow preemption here, because * sys_ptrace() needs this task to be inactive. @@ -1886,7 +1900,8 @@ retry: if (notify) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, notify); + do_notify_parent_cldstop(current, task_ptrace(current), + notify); read_unlock(&tasklist_lock); } @@ -1982,6 +1997,7 @@ relock: * the CLD_ si_code into SIGNAL_CLD_MASK bits. */ if (unlikely(signal->flags & SIGNAL_CLD_MASK)) { + struct task_struct *leader; int why; if (signal->flags & SIGNAL_CLD_CONTINUED) @@ -1994,7 +2010,8 @@ relock: spin_unlock_irq(&sighand->siglock); read_lock(&tasklist_lock); - do_notify_parent_cldstop(current->group_leader, why); + leader = current->group_leader; + do_notify_parent_cldstop(leader, task_ptrace(leader), why); read_unlock(&tasklist_lock); goto relock; } @@ -2167,7 +2184,7 @@ out: if (unlikely(group_stop)) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(tsk, group_stop); + do_notify_parent_cldstop(tsk, task_ptrace(tsk), group_stop); read_unlock(&tasklist_lock); } } -- cgit v1.1 From 62bcf9d992ecc19ea4f37ff57ee0b3333e3e843e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:01 +0100 Subject: job control: Job control stop notifications should always go to the real parent The stopped notifications in do_signal_stop() and exit_signals() are always for the completion of job control. The one in do_signal_stop() may be delivered to the ptracer if PTRACE_ATTACH races with notification and the one in exit_signals() if task exits while ptraced. In both cases, the notifications are meaningless and confusing to the ptracer as it never accesses the group stop state while the real parent would miss notifications for the events it is watching. Make sure these notifications always go to the real parent by calling do_notify_parent_cld_stop() with %false @for_ptrace. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/signal.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 69d6054..9f10b24 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1898,10 +1898,18 @@ retry: __set_current_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); + /* + * Notify the parent of the group stop completion. Because + * we're not holding either the siglock or tasklist_lock + * here, ptracer may attach inbetween; however, this is for + * group stop and should always be delivered to the real + * parent of the group leader. The new ptracer will get + * its notification when this task transitions into + * TASK_TRACED. + */ if (notify) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(current, task_ptrace(current), - notify); + do_notify_parent_cldstop(current, false, notify); read_unlock(&tasklist_lock); } @@ -2182,9 +2190,13 @@ void exit_signals(struct task_struct *tsk) out: spin_unlock_irq(&tsk->sighand->siglock); + /* + * If group stop has completed, deliver the notification. This + * should always go to the real parent of the group leader. + */ if (unlikely(group_stop)) { read_lock(&tasklist_lock); - do_notify_parent_cldstop(tsk, task_ptrace(tsk), group_stop); + do_notify_parent_cldstop(tsk, false, group_stop); read_unlock(&tasklist_lock); } } -- cgit v1.1 From ceb6bd67f9b9db765e1c29405f26e8460391badd Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:01 +0100 Subject: job control: Notify the real parent of job control events regardless of ptrace With recent changes, job control and ptrace stopped states are properly separated and accessible to the real parent and the ptracer respectively; however, notifications of job control stopped/continued events to the real parent while ptraced are still missing. A ptracee participates in group stop in ptrace_stop() but the completion isn't notified. If participation results in completion of group stop, notify the real parent of the event. The ptrace and group stops are separate and can be handled as such. However, when the real parent and the ptracer are in the same thread group, only the ptrace stop event is visible through wait(2) and the duplicate notifications are different from the current behavior and are confusing. Suppress group stop notification in such cases. The continued state is shared between the real parent and the ptracer but is only meaningful to the real parent. Always notify the real parent and notify the ptracer too for backward compatibility. Similar to stop notification, if the real parent is the ptracer, suppress a duplicate notification. Test case follows. #include #include #include #include #include #include #include int main(void) { const struct timespec ts100ms = { .tv_nsec = 100000000 }; pid_t tracee, tracer; siginfo_t si; int i; tracee = fork(); if (tracee == 0) { while (1) { printf("tracee: SIGSTOP\n"); raise(SIGSTOP); nanosleep(&ts100ms, NULL); printf("tracee: SIGCONT\n"); raise(SIGCONT); nanosleep(&ts100ms, NULL); } } waitid(P_PID, tracee, &si, WSTOPPED | WNOHANG | WNOWAIT); tracer = fork(); if (tracer == 0) { nanosleep(&ts100ms, NULL); ptrace(PTRACE_ATTACH, tracee, NULL, NULL); for (i = 0; i < 11; i++) { si.si_pid = 0; waitid(P_PID, tracee, &si, WSTOPPED); if (si.si_pid && si.si_code == CLD_TRAPPED) ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status); } printf("tracer: EXITING\n"); return 0; } while (1) { si.si_pid = 0; waitid(P_PID, tracee, &si, WSTOPPED | WCONTINUED | WEXITED); if (si.si_pid) printf("mommy : WAIT status=%02d code=%02d\n", si.si_status, si.si_code); } return 0; } Before this patch, while ptraced, the real parent doesn't get notifications for job control events, so although it can access those events, the later waitid(2) call never wakes up. tracee: SIGSTOP mommy : WAIT status=19 code=05 tracee: SIGCONT tracee: SIGSTOP tracee: SIGCONT tracee: SIGSTOP tracee: SIGCONT tracee: SIGSTOP tracer: EXITING mommy : WAIT status=19 code=05 ^C After this patch, it works as expected. tracee: SIGSTOP mommy : WAIT status=19 code=05 tracee: SIGCONT mommy : WAIT status=18 code=06 tracee: SIGSTOP mommy : WAIT status=19 code=05 tracee: SIGCONT mommy : WAIT status=18 code=06 tracee: SIGSTOP mommy : WAIT status=19 code=05 tracee: SIGCONT mommy : WAIT status=18 code=06 tracee: SIGSTOP tracer: EXITING mommy : WAIT status=19 code=05 ^C -v2: Oleg pointed out that * Group stop notification to the real parent should also happen when ptracer detach races with ptrace_stop(). * real_parent_is_ptracer() should be testing thread group equality not the task itself as wait(2) and stop/cont notifications are normally thread-group wide. Both issues are fixed accordingly. -v3: real_parent_is_ptracer() updated to test child->real_parent instead of child->group_leader->real_parent per Oleg's suggestion. Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/signal.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 9f10b24..f65403d 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1694,6 +1694,15 @@ static int sigkill_pending(struct task_struct *tsk) } /* + * Test whether the target task of the usual cldstop notification - the + * real_parent of @child - is in the same group as the ptracer. + */ +static bool real_parent_is_ptracer(struct task_struct *child) +{ + return same_thread_group(child->parent, child->real_parent); +} + +/* * This must be called with current->sighand->siglock held. * * This should be the path for all ptrace stops. @@ -1708,6 +1717,8 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) __releases(¤t->sighand->siglock) __acquires(¤t->sighand->siglock) { + bool gstop_done = false; + if (arch_ptrace_stop_needed(exit_code, info)) { /* * The arch code has something special to do before a @@ -1735,7 +1746,7 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) * is entered - ignore it. */ if (why == CLD_STOPPED && (current->group_stop & GROUP_STOP_PENDING)) - task_participate_group_stop(current); + gstop_done = task_participate_group_stop(current); current->last_siginfo = info; current->exit_code = exit_code; @@ -1757,7 +1768,20 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) spin_unlock_irq(¤t->sighand->siglock); read_lock(&tasklist_lock); if (may_ptrace_stop()) { - do_notify_parent_cldstop(current, task_ptrace(current), why); + /* + * Notify parents of the stop. + * + * While ptraced, there are two parents - the ptracer and + * the real_parent of the group_leader. The ptracer should + * know about every stop while the real parent is only + * interested in the completion of group stop. The states + * for the two don't interact with each other. Notify + * separately unless they're gonna be duplicates. + */ + do_notify_parent_cldstop(current, true, why); + if (gstop_done && !real_parent_is_ptracer(current)) + do_notify_parent_cldstop(current, false, why); + /* * Don't want to allow preemption here, because * sys_ptrace() needs this task to be inactive. @@ -1772,7 +1796,16 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) /* * By the time we got the lock, our tracer went away. * Don't drop the lock yet, another tracer may come. + * + * If @gstop_done, the ptracer went away between group stop + * completion and here. During detach, it would have set + * GROUP_STOP_PENDING on us and we'll re-enter TASK_STOPPED + * in do_signal_stop() on return, so notifying the real + * parent of the group stop completion is enough. */ + if (gstop_done) + do_notify_parent_cldstop(current, false, why); + __set_current_state(TASK_RUNNING); if (clear_code) current->exit_code = 0; @@ -2017,10 +2050,24 @@ relock: spin_unlock_irq(&sighand->siglock); + /* + * Notify the parent that we're continuing. This event is + * always per-process and doesn't make whole lot of sense + * for ptracers, who shouldn't consume the state via + * wait(2) either, but, for backward compatibility, notify + * the ptracer of the group leader too unless it's gonna be + * a duplicate. + */ read_lock(&tasklist_lock); + + do_notify_parent_cldstop(current, false, why); + leader = current->group_leader; - do_notify_parent_cldstop(leader, task_ptrace(leader), why); + if (task_ptrace(leader) && !real_parent_is_ptracer(leader)) + do_notify_parent_cldstop(leader, true, why); + read_unlock(&tasklist_lock); + goto relock; } -- cgit v1.1 From 244056f9dbbc6dc4126a301c745fa3dd67d8af3c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 Mar 2011 10:37:01 +0100 Subject: job control: Don't send duplicate job control stop notification while ptraced Just as group_exit_code shouldn't be generated when a PTRACE_CONT'd task re-enters job control stop, notifiction for the event should be suppressed too. The logic is the same as the group_exit_code generation suppression in do_signal_stop(), if SIGNAL_STOP_STOPPED is already set, the task is re-entering job control stop without intervening SIGCONT and the notifications should be suppressed. Test case follows. #include #include #include #include #include #include static const struct timespec ts100ms = { .tv_nsec = 100000000 }; static pid_t tracee, tracer; static const char *pid_who(pid_t pid) { return pid == tracee ? "tracee" : (pid == tracer ? "tracer" : "mommy "); } static void sigchld_sigaction(int signo, siginfo_t *si, void *ucxt) { printf("%s: SIG status=%02d code=%02d (%s)\n", pid_who(getpid()), si->si_status, si->si_code, pid_who(si->si_pid)); } int main(void) { const struct sigaction chld_sa = { .sa_sigaction = sigchld_sigaction, .sa_flags = SA_SIGINFO|SA_RESTART }; siginfo_t si; sigaction(SIGCHLD, &chld_sa, NULL); tracee = fork(); if (!tracee) { tracee = getpid(); while (1) pause(); } kill(tracee, SIGSTOP); waitid(P_PID, tracee, &si, WSTOPPED); tracer = fork(); if (!tracer) { tracer = getpid(); ptrace(PTRACE_ATTACH, tracee, NULL, NULL); waitid(P_PID, tracee, &si, WSTOPPED); ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status); waitid(P_PID, tracee, &si, WSTOPPED); ptrace(PTRACE_CONT, tracee, NULL, (void *)(long)si.si_status); waitid(P_PID, tracee, &si, WSTOPPED); printf("tracer: detaching\n"); ptrace(PTRACE_DETACH, tracee, NULL, NULL); return 0; } while (1) pause(); return 0; } Before the patch, the parent gets the second notification for the tracee after the tracer detaches. si_status is zero because group_exit_code is not set by the group stop completion which triggered this notification. mommy : SIG status=19 code=05 (tracee) tracer: SIG status=00 code=05 (tracee) tracer: SIG status=19 code=04 (tracee) tracer: SIG status=00 code=05 (tracee) tracer: detaching mommy : SIG status=00 code=05 (tracee) mommy : SIG status=00 code=01 (tracer) ^C After the patch, the duplicate notification is gone. mommy : SIG status=19 code=05 (tracee) tracer: SIG status=00 code=05 (tracee) tracer: SIG status=19 code=04 (tracee) tracer: SIG status=00 code=05 (tracee) tracer: detaching mommy : SIG status=00 code=01 (tracer) ^C Signed-off-by: Tejun Heo Acked-by: Oleg Nesterov --- kernel/signal.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index f65403d..f799a05 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -268,6 +268,10 @@ void task_clear_group_stop_pending(struct task_struct *task) * * CONTEXT: * Must be called with @task->sighand->siglock held. + * + * RETURNS: + * %true if group stop completion should be notified to the parent, %false + * otherwise. */ static bool task_participate_group_stop(struct task_struct *task) { @@ -284,7 +288,11 @@ static bool task_participate_group_stop(struct task_struct *task) if (!WARN_ON_ONCE(sig->group_stop_count == 0)) sig->group_stop_count--; - if (!sig->group_stop_count) { + /* + * Tell the caller to notify completion iff we are entering into a + * fresh group stop. Read comment in do_signal_stop() for details. + */ + if (!sig->group_stop_count && !(sig->flags & SIGNAL_STOP_STOPPED)) { sig->flags = SIGNAL_STOP_STOPPED; return true; } -- cgit v1.1 From 1deac632fc3dcff33a6df3e82ef10c738ac13fe6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 1 Apr 2011 20:11:50 +0200 Subject: signal: prepare_signal(SIGCONT) shouldn't play with TIF_SIGPENDING prepare_signal(SIGCONT) should never set TIF_SIGPENDING or wake up the TASK_INTERRUPTIBLE threads. We are going to call complete_signal() which should pick the right thread correctly. All we need is to wake up the TASK_STOPPED threads. If the task was stopped, it can't return to usermode without taking ->siglock. Otherwise we don't care, and the spurious TIF_SIGPENDING can't be useful. The comment says: * If there is a handler for SIGCONT, we must make * sure that no thread returns to user mode before * we post the signal It is not clear what this means. Probably, "when there's only a single thread" and this continues to be true. Otherwise, even if this SIGCONT is not private, with or without this change only one thread can dequeue SIGCONT, other threads can happily return to user mode before before that thread handles this signal. Note also that wake_up_state(t, __TASK_STOPPED) can't race with the task which changes its state, TASK_STOPPED state is protected by ->siglock as well. In short: when it comes to signal delivery, SIGCONT is the normal signal and does not need any special support. Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- kernel/signal.c | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index f799a05..38ea9e2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -788,37 +788,14 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) } else if (sig == SIGCONT) { unsigned int why; /* - * Remove all stop signals from all queues, - * and wake all threads. + * Remove all stop signals from all queues, wake all threads. */ rm_from_queue(SIG_KERNEL_STOP_MASK, &signal->shared_pending); t = p; do { - unsigned int state; - task_clear_group_stop_pending(t); - rm_from_queue(SIG_KERNEL_STOP_MASK, &t->pending); - /* - * If there is a handler for SIGCONT, we must make - * sure that no thread returns to user mode before - * we post the signal, in case it was the only - * thread eligible to run the signal handler--then - * it must not do anything between resuming and - * running the handler. With the TIF_SIGPENDING - * flag set, the thread will pause and acquire the - * siglock that we hold now and until we've queued - * the pending signal. - * - * Wake up the stopped thread _after_ setting - * TIF_SIGPENDING - */ - state = __TASK_STOPPED; - if (sig_user_defined(t, SIGCONT) && !sigismember(&t->blocked, SIGCONT)) { - set_tsk_thread_flag(t, TIF_SIGPENDING); - state |= TASK_INTERRUPTIBLE; - } - wake_up_state(t, state); + wake_up_state(t, __TASK_STOPPED); } while_each_thread(p, t); /* -- cgit v1.1 From 780006eac2fe7f4d2582da16a096e5a44c4767ff Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 1 Apr 2011 20:12:16 +0200 Subject: signal: do_signal_stop: Remove the unneeded task_clear_group_stop_pending() PF_EXITING or TASK_STOPPED has already called task_participate_group_stop() and cleared its ->group_stop. No need to do task_clear_group_stop_pending() when we start the new group stop. Add a small comment to explain the !task_is_stopped() check. Note that this check is not exactly right and it can lead to unnecessary stop later if the thread is TASK_PTRACED. What we need is task_participated_in_group_stop(), this will be solved later. Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- kernel/signal.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 38ea9e2..e9abc69 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1866,7 +1866,8 @@ static int do_signal_stop(int signr) * still in effect and then receive a stop signal and * initiate another group stop. This deviates from the * usual behavior as two consecutive stop signals can't - * cause two group stops when !ptraced. + * cause two group stops when !ptraced. That is why we + * also check !task_is_stopped(t) below. * * The condition can be distinguished by testing whether * SIGNAL_STOP_STOPPED is already set. Don't generate @@ -1896,8 +1897,6 @@ static int do_signal_stop(int signr) t->group_stop |= signr | gstop; sig->group_stop_count++; signal_wake_up(t, 0); - } else { - task_clear_group_stop_pending(t); } } } -- cgit v1.1 From ee77f075921730b2b465880f9fd4367003bdab39 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 1 Apr 2011 20:12:38 +0200 Subject: signal: Turn SIGNAL_STOP_DEQUEUED into GROUP_STOP_DEQUEUED This patch moves SIGNAL_STOP_DEQUEUED from signal_struct->flags to task_struct->group_stop, and thus makes it per-thread. Like SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can be false-positive after return from get_signal_to_deliver(), this is fine. The only purpose of this bit is: we can drop ->siglock after __dequeue_signal() returns the sig_kernel_stop() signal and before we call do_signal_stop(), in this case we must not miss SIGCONT if it comes in between. But, unlike SIGNAL_STOP_DEQUEUED, GROUP_STOP_DEQUEUED can not be false-positive in do_signal_stop() if multiple threads dequeue the sig_kernel_stop() signal at the same time. Consider two threads T1 and T2, SIGTTIN has a hanlder. - T1 dequeues SIGTSTP and sets SIGNAL_STOP_DEQUEUED, then it drops ->siglock - SIGCONT comes and clears SIGNAL_STOP_DEQUEUED, SIGTSTP should be cancelled. - T2 dequeues SIGTTIN and sets SIGNAL_STOP_DEQUEUED again. Since we have a handler we should not stop, T2 returns to usermode to run the handler. - T1 continues, calls do_signal_stop() and wrongly starts the group stop because SIGNAL_STOP_DEQUEUED was restored in between. With or without this change: - we need to do something with ptrace_signal() which can return SIGSTOP, but this needs another discussion - SIGSTOP can be lost if it races with the mt exec, will be fixed later. Signed-off-by: Oleg Nesterov Signed-off-by: Tejun Heo --- kernel/signal.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index e9abc69..4f7312b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -254,7 +254,8 @@ static void task_clear_group_stop_trapping(struct task_struct *task) */ void task_clear_group_stop_pending(struct task_struct *task) { - task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME); + task->group_stop &= ~(GROUP_STOP_PENDING | GROUP_STOP_CONSUME | + GROUP_STOP_DEQUEUED); } /** @@ -602,7 +603,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) * is to alert stop-signal processing code when another * processor has come along and cleared the flag. */ - tsk->signal->flags |= SIGNAL_STOP_DEQUEUED; + current->group_stop |= GROUP_STOP_DEQUEUED; } if ((info->si_code & __SI_MASK) == __SI_TIMER && info->si_sys_private) { /* @@ -821,13 +822,6 @@ static int prepare_signal(int sig, struct task_struct *p, int from_ancestor_ns) signal->flags = why | SIGNAL_STOP_CONTINUED; signal->group_stop_count = 0; signal->group_exit_code = 0; - } else { - /* - * We are not stopped, but there could be a stop - * signal in the middle of being processed after - * being removed from the queue. Clear that too. - */ - signal->flags &= ~SIGNAL_STOP_DEQUEUED; } } @@ -1855,7 +1849,7 @@ static int do_signal_stop(int signr) /* signr will be recorded in task->group_stop for retries */ WARN_ON_ONCE(signr & ~GROUP_STOP_SIGMASK); - if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED) || + if (!likely(current->group_stop & GROUP_STOP_DEQUEUED) || unlikely(signal_group_exit(sig))) return 0; /* -- cgit v1.1 From 0edceb7bcd82802f721f3c94eed9b3e2869d3740 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 19:17:37 +0200 Subject: signal: introduce retarget_shared_pending() No functional changes. Move the notify-other-threads code from exit_signals() to the new helper, retarget_shared_pending(). Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- kernel/signal.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index c15e979..5341e21 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2198,10 +2198,25 @@ relock: return signr; } +/* + * It could be that complete_signal() picked us to notify about the + * group-wide signal. Another thread should be notified now to take + * the signal since we will not. + */ +static void retarget_shared_pending(struct task_struct *tsk) +{ + struct task_struct *t; + + t = tsk; + while_each_thread(tsk, t) { + if (!signal_pending(t) && !(t->flags & PF_EXITING)) + recalc_sigpending_and_wake(t); + } +} + void exit_signals(struct task_struct *tsk) { int group_stop = 0; - struct task_struct *t; if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { tsk->flags |= PF_EXITING; @@ -2217,14 +2232,7 @@ void exit_signals(struct task_struct *tsk) if (!signal_pending(tsk)) goto out; - /* - * It could be that __group_complete_signal() choose us to - * notify about group-wide signal. Another thread should be - * woken now to take the signal since we will not. - */ - for (t = tsk; (t = next_thread(t)) != tsk; ) - if (!signal_pending(t) && !(t->flags & PF_EXITING)) - recalc_sigpending_and_wake(t); + retarget_shared_pending(tsk); if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) && task_participate_group_stop(tsk)) -- cgit v1.1 From f646e227b88a164a841d6b6dd969d8a45272dd83 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 19:18:39 +0200 Subject: signal: retarget_shared_pending: consider shared/unblocked signals only exit_signals() checks signal_pending() before retarget_shared_pending() but this is suboptimal. We can avoid the while_each_thread() loop in case when there are no shared signals visible to us. Add the "shared_pending.signal & ~blocked" check. We don't use tsk->blocked directly but pass ~blocked as an argument, this is needed for the next patch. Note: we can optimize this more. while_each_thread(t) can check t->blocked into account and stop after every pending signal has the new target, see the next patch. Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- kernel/signal.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 5341e21..06214b5 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2203,10 +2203,15 @@ relock: * group-wide signal. Another thread should be notified now to take * the signal since we will not. */ -static void retarget_shared_pending(struct task_struct *tsk) +static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) { + sigset_t retarget; struct task_struct *t; + sigandsets(&retarget, &tsk->signal->shared_pending.signal, which); + if (sigisemptyset(&retarget)) + return; + t = tsk; while_each_thread(tsk, t) { if (!signal_pending(t) && !(t->flags & PF_EXITING)) @@ -2217,6 +2222,7 @@ static void retarget_shared_pending(struct task_struct *tsk) void exit_signals(struct task_struct *tsk) { int group_stop = 0; + sigset_t unblocked; if (thread_group_empty(tsk) || signal_group_exit(tsk->signal)) { tsk->flags |= PF_EXITING; @@ -2232,7 +2238,9 @@ void exit_signals(struct task_struct *tsk) if (!signal_pending(tsk)) goto out; - retarget_shared_pending(tsk); + unblocked = tsk->blocked; + signotset(&unblocked); + retarget_shared_pending(tsk, &unblocked); if (unlikely(tsk->group_stop & GROUP_STOP_PENDING) && task_participate_group_stop(tsk)) -- cgit v1.1 From fec9993db093acfc3999a364e31f8adae41fcb28 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 19:50:21 +0200 Subject: signal: retarget_shared_pending: optimize while_each_thread() loop retarget_shared_pending() blindly does recalc_sigpending_and_wake() for every sub-thread, this is suboptimal. We can check t->blocked and stop looping once every bit in shared_pending has the new target. Note: we do not take task_is_stopped_or_traced(t) into account, we are not trying to speed up the signal delivery or to avoid the unnecessary (but harmless) signal_wake_up(0) in this unlikely case. Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- kernel/signal.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 06214b5..707736e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2200,8 +2200,8 @@ relock: /* * It could be that complete_signal() picked us to notify about the - * group-wide signal. Another thread should be notified now to take - * the signal since we will not. + * group-wide signal. Other threads should be notified now to take + * the shared signals in @which since we will not. */ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) { @@ -2214,8 +2214,19 @@ static void retarget_shared_pending(struct task_struct *tsk, sigset_t *which) t = tsk; while_each_thread(tsk, t) { - if (!signal_pending(t) && !(t->flags & PF_EXITING)) - recalc_sigpending_and_wake(t); + if (t->flags & PF_EXITING) + continue; + + if (!has_pending_signals(&retarget, &t->blocked)) + continue; + /* Remove the signals this thread can handle. */ + sigandsets(&retarget, &retarget, &t->blocked); + + if (!signal_pending(t)) + signal_wake_up(t, 0); + + if (sigisemptyset(&retarget)) + break; } } -- cgit v1.1 From 73ef4aeb61b53fce464a7e24ef03a26f98b2f617 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 19:54:20 +0200 Subject: signal: sigprocmask: narrow the scope of ->siglock No functional changes, preparation to simplify the review of the next change. 1. We can read current->block lockless, nobody else can ever change this mask. 2. Calculate the resulting sigset_t outside of ->siglock into the temporary variable, then take ->siglock and change ->blocked. Also, kill the stale comment about BKL. Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- kernel/signal.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 707736e..e8308e3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2300,12 +2300,6 @@ long do_no_restart_syscall(struct restart_block *param) } /* - * We don't need to get the kernel lock - this is all local to this - * particular thread.. (and that's good, because this is _heavily_ - * used by various programs) - */ - -/* * This is also useful for kernel threads that want to temporarily * (or permanently) block certain signals. * @@ -2315,30 +2309,33 @@ long do_no_restart_syscall(struct restart_block *param) */ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) { - int error; + struct task_struct *tsk = current; + sigset_t newset; - spin_lock_irq(¤t->sighand->siglock); + /* Lockless, only current can change ->blocked, never from irq */ if (oldset) - *oldset = current->blocked; + *oldset = tsk->blocked; - error = 0; switch (how) { case SIG_BLOCK: - sigorsets(¤t->blocked, ¤t->blocked, set); + sigorsets(&newset, &tsk->blocked, set); break; case SIG_UNBLOCK: - signandsets(¤t->blocked, ¤t->blocked, set); + signandsets(&newset, &tsk->blocked, set); break; case SIG_SETMASK: - current->blocked = *set; + newset = *set; break; default: - error = -EINVAL; + return -EINVAL; } + + spin_lock_irq(&tsk->sighand->siglock); + tsk->blocked = newset; recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + spin_unlock_irq(&tsk->sighand->siglock); - return error; + return 0; } /** -- cgit v1.1 From e6fa16ab9c1e9b344428e6fea4d29e3cc4b28fb0 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 20:59:41 +0200 Subject: signal: sigprocmask() should do retarget_shared_pending() In short, almost every changing of current->blocked is wrong, or at least can lead to the unexpected results. For example. Two threads T1 and T2, T1 sleeps in sigtimedwait/pause/etc. kill(tgid, SIG) can pick T2 for TIF_SIGPENDING. If T2 calls sigprocmask() and blocks SIG before it notices the pending signal, nobody else can handle this pending shared signal. I am not sure this is bug, but at least this looks strange imho. T1 should not sleep forever, there is a signal which should wake it up. This patch moves the code which actually changes ->blocked into the new helper, set_current_blocked() and changes this code to call retarget_shared_pending() as exit_signals() does. We should only care about the signals we just blocked, we use "newset & ~current->blocked" as a mask. We do not check !sigisemptyset(newblocked), retarget_shared_pending() is cheap unless mask & shared_pending. Note: for this particular case we could simply change sigprocmask() to return -EINTR if signal_pending(), but then we should change other callers and, more importantly, if we need this fix then set_current_blocked() will have more callers and some of them can't restart. See the next patch as a random example. Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- kernel/signal.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index e8308e3..8aa3a2e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2299,6 +2299,29 @@ long do_no_restart_syscall(struct restart_block *param) return -EINTR; } +/** + * set_current_blocked - change current->blocked mask + * @newset: new mask + * + * It is wrong to change ->blocked directly, this helper should be used + * to ensure the process can't miss a shared signal we are going to block. + */ +void set_current_blocked(const sigset_t *newset) +{ + struct task_struct *tsk = current; + + spin_lock_irq(&tsk->sighand->siglock); + if (signal_pending(tsk) && !thread_group_empty(tsk)) { + sigset_t newblocked; + /* A set of now blocked but previously unblocked signals. */ + signandsets(&newblocked, newset, ¤t->blocked); + retarget_shared_pending(tsk, &newblocked); + } + tsk->blocked = *newset; + recalc_sigpending(); + spin_unlock_irq(&tsk->sighand->siglock); +} + /* * This is also useful for kernel threads that want to temporarily * (or permanently) block certain signals. @@ -2330,11 +2353,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) return -EINVAL; } - spin_lock_irq(&tsk->sighand->siglock); - tsk->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(&tsk->sighand->siglock); - + set_current_blocked(&newset); return 0; } -- cgit v1.1 From bb7efee2ca63b08795ffb3cda96fc89d2e641b79 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 21:18:10 +0200 Subject: signal: cleanup sys_rt_sigprocmask() sys_rt_sigprocmask() looks unnecessarily complicated, simplify it. We can just read current->blocked lockless unconditionally before anything else and then copy-to-user it if needed. At worst we copy 4 words on mips. We could copy-to-user the old mask first and simplify the code even more, but the patch tries to keep the current behaviour: we change current->block even if copy_to_user(oset) fails. Signed-off-by: Oleg Nesterov Reviewed-by: Matt Fleming Acked-by: Tejun Heo --- kernel/signal.c | 38 ++++++++++++++++---------------------- 1 file changed, 16 insertions(+), 22 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 8aa3a2e..bb92000 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2364,40 +2364,34 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) * @oset: previous value of signal mask if non-null * @sigsetsize: size of sigset_t type */ -SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set, +SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, nset, sigset_t __user *, oset, size_t, sigsetsize) { - int error = -EINVAL; sigset_t old_set, new_set; + int error; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) - goto out; + return -EINVAL; - if (set) { - error = -EFAULT; - if (copy_from_user(&new_set, set, sizeof(*set))) - goto out; + old_set = current->blocked; + + if (nset) { + if (copy_from_user(&new_set, nset, sizeof(sigset_t))) + return -EFAULT; sigdelsetmask(&new_set, sigmask(SIGKILL)|sigmask(SIGSTOP)); - error = sigprocmask(how, &new_set, &old_set); + error = sigprocmask(how, &new_set, NULL); if (error) - goto out; - if (oset) - goto set_old; - } else if (oset) { - spin_lock_irq(¤t->sighand->siglock); - old_set = current->blocked; - spin_unlock_irq(¤t->sighand->siglock); + return error; + } - set_old: - error = -EFAULT; - if (copy_to_user(oset, &old_set, sizeof(*oset))) - goto out; + if (oset) { + if (copy_to_user(oset, &old_set, sizeof(sigset_t))) + return -EFAULT; } - error = 0; -out: - return error; + + return 0; } long do_sigpending(void __user *set, unsigned long sigsetsize) -- cgit v1.1 From fe0faa005d43bc44c357631d51c273806086caa4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 21:24:19 +0200 Subject: signal: sys_rt_sigtimedwait: simplify the timeout logic No functional changes, cleanup compat_sys_rt_sigtimedwait() and sys_rt_sigtimedwait(). Calculate the timeout before we take ->siglock, this simplifies and lessens the code. Use timespec_valid() to check the timespec. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Reviewed-by: Matt Fleming --- kernel/signal.c | 48 +++++++++++++++++++++--------------------------- 1 file changed, 21 insertions(+), 27 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index bb92000..c734619 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2519,7 +2519,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, sigset_t these; struct timespec ts; siginfo_t info; - long timeout = 0; + long timeout; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) @@ -2535,41 +2535,35 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP)); signotset(&these); + timeout = MAX_SCHEDULE_TIMEOUT; if (uts) { if (copy_from_user(&ts, uts, sizeof(ts))) return -EFAULT; - if (ts.tv_nsec >= 1000000000L || ts.tv_nsec < 0 - || ts.tv_sec < 0) + if (!timespec_valid(&ts)) return -EINVAL; + timeout = timespec_to_jiffies(&ts) + (ts.tv_sec || ts.tv_nsec); } spin_lock_irq(¤t->sighand->siglock); sig = dequeue_signal(current, &these, &info); - if (!sig) { - timeout = MAX_SCHEDULE_TIMEOUT; - if (uts) - timeout = (timespec_to_jiffies(&ts) - + (ts.tv_sec || ts.tv_nsec)); + if (!sig && timeout) { + /* + * None ready -- temporarily unblock those we're + * interested while we are sleeping in so that we'll + * be awakened when they arrive. + */ + current->real_blocked = current->blocked; + sigandsets(¤t->blocked, ¤t->blocked, &these); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); - if (timeout) { - /* - * None ready -- temporarily unblock those we're - * interested while we are sleeping in so that we'll - * be awakened when they arrive. - */ - current->real_blocked = current->blocked; - sigandsets(¤t->blocked, ¤t->blocked, &these); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - timeout = schedule_timeout_interruptible(timeout); - - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &these, &info); - current->blocked = current->real_blocked; - siginitset(¤t->real_blocked, 0); - recalc_sigpending(); - } + timeout = schedule_timeout_interruptible(timeout); + + spin_lock_irq(¤t->sighand->siglock); + sig = dequeue_signal(current, &these, &info); + current->blocked = current->real_blocked; + siginitset(¤t->real_blocked, 0); + recalc_sigpending(); } spin_unlock_irq(¤t->sighand->siglock); -- cgit v1.1 From 943df1485a8ff0e600729e082e568ece04d4de9e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 21:44:14 +0200 Subject: signal: introduce do_sigtimedwait() to factor out compat/native code Factor out the common code in sys_rt_sigtimedwait/compat_sys_rt_sigtimedwait to the new helper, do_sigtimedwait(). Add the comment to document the extra tick we add to timespec_to_jiffies(ts), thanks to Linus who explained this to me. Perhaps it would be better to move compat_sys_rt_sigtimedwait() into signal.c under CONFIG_COMPAT, then we can make do_sigtimedwait() static. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Reviewed-by: Matt Fleming --- kernel/signal.c | 110 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 45 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index c734619..1ab89f6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2504,6 +2504,66 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from) #endif /** + * do_sigtimedwait - wait for queued signals specified in @which + * @which: queued signals to wait for + * @info: if non-null, the signal's siginfo is returned here + * @ts: upper bound on process time suspension + */ +int do_sigtimedwait(const sigset_t *which, siginfo_t *info, + const struct timespec *ts) +{ + struct task_struct *tsk = current; + long timeout = MAX_SCHEDULE_TIMEOUT; + sigset_t mask = *which; + int sig; + + if (ts) { + if (!timespec_valid(ts)) + return -EINVAL; + timeout = timespec_to_jiffies(ts); + /* + * We can be close to the next tick, add another one + * to ensure we will wait at least the time asked for. + */ + if (ts->tv_sec || ts->tv_nsec) + timeout++; + } + + /* + * Invert the set of allowed signals to get those we want to block. + */ + sigdelsetmask(&mask, sigmask(SIGKILL) | sigmask(SIGSTOP)); + signotset(&mask); + + spin_lock_irq(&tsk->sighand->siglock); + sig = dequeue_signal(tsk, &mask, info); + if (!sig && timeout) { + /* + * None ready, temporarily unblock those we're interested + * while we are sleeping in so that we'll be awakened when + * they arrive. + */ + tsk->real_blocked = tsk->blocked; + sigandsets(&tsk->blocked, &tsk->blocked, &mask); + recalc_sigpending(); + spin_unlock_irq(&tsk->sighand->siglock); + + timeout = schedule_timeout_interruptible(timeout); + + spin_lock_irq(&tsk->sighand->siglock); + sig = dequeue_signal(tsk, &mask, info); + tsk->blocked = tsk->real_blocked; + siginitset(&tsk->real_blocked, 0); + recalc_sigpending(); + } + spin_unlock_irq(&tsk->sighand->siglock); + + if (sig) + return sig; + return timeout ? -EINTR : -EAGAIN; +} + +/** * sys_rt_sigtimedwait - synchronously wait for queued signals specified * in @uthese * @uthese: queued signals to wait for @@ -2515,11 +2575,10 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, siginfo_t __user *, uinfo, const struct timespec __user *, uts, size_t, sigsetsize) { - int ret, sig; sigset_t these; struct timespec ts; siginfo_t info; - long timeout; + int ret; /* XXX: Don't preclude handling different sized sigset_t's. */ if (sigsetsize != sizeof(sigset_t)) @@ -2528,55 +2587,16 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese, if (copy_from_user(&these, uthese, sizeof(these))) return -EFAULT; - /* - * Invert the set of allowed signals to get those we - * want to block. - */ - sigdelsetmask(&these, sigmask(SIGKILL)|sigmask(SIGSTOP)); - signotset(&these); - - timeout = MAX_SCHEDULE_TIMEOUT; if (uts) { if (copy_from_user(&ts, uts, sizeof(ts))) return -EFAULT; - if (!timespec_valid(&ts)) - return -EINVAL; - timeout = timespec_to_jiffies(&ts) + (ts.tv_sec || ts.tv_nsec); } - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &these, &info); - if (!sig && timeout) { - /* - * None ready -- temporarily unblock those we're - * interested while we are sleeping in so that we'll - * be awakened when they arrive. - */ - current->real_blocked = current->blocked; - sigandsets(¤t->blocked, ¤t->blocked, &these); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - timeout = schedule_timeout_interruptible(timeout); - - spin_lock_irq(¤t->sighand->siglock); - sig = dequeue_signal(current, &these, &info); - current->blocked = current->real_blocked; - siginitset(¤t->real_blocked, 0); - recalc_sigpending(); - } - spin_unlock_irq(¤t->sighand->siglock); + ret = do_sigtimedwait(&these, &info, uts ? &ts : NULL); - if (sig) { - ret = sig; - if (uinfo) { - if (copy_siginfo_to_user(uinfo, &info)) - ret = -EFAULT; - } - } else { - ret = -EAGAIN; - if (timeout) - ret = -EINTR; + if (ret > 0 && uinfo) { + if (copy_siginfo_to_user(uinfo, &info)) + ret = -EFAULT; } return ret; -- cgit v1.1 From b182801ab35f7a0afb3cdf8ba5df464d04206b46 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 21:56:14 +0200 Subject: signal: do_sigtimedwait() needs retarget_shared_pending() do_sigtimedwait() changes current->blocked and thus it needs set_current_blocked()->retarget_shared_pending(). We could use set_current_blocked() directly. It is fine to change ->real_blocked from all-zeroes to ->blocked and vice versa lockless, but this is not immediately clear, looks racy, and needs a huge comment to explain why this is correct. To keep the things simple this patch adds the new static helper, __set_task_blocked() which should be called with ->siglock held. This way we can change both ->real_blocked and ->blocked atomically under ->siglock as the current code does. This is more understandable. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Reviewed-by: Matt Fleming --- kernel/signal.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 1ab89f6..4d97e11 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2299,6 +2299,18 @@ long do_no_restart_syscall(struct restart_block *param) return -EINTR; } +static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) +{ + if (signal_pending(tsk) && !thread_group_empty(tsk)) { + sigset_t newblocked; + /* A set of now blocked but previously unblocked signals. */ + signandsets(&newblocked, newset, ¤t->blocked); + retarget_shared_pending(tsk, &newblocked); + } + tsk->blocked = *newset; + recalc_sigpending(); +} + /** * set_current_blocked - change current->blocked mask * @newset: new mask @@ -2311,14 +2323,7 @@ void set_current_blocked(const sigset_t *newset) struct task_struct *tsk = current; spin_lock_irq(&tsk->sighand->siglock); - if (signal_pending(tsk) && !thread_group_empty(tsk)) { - sigset_t newblocked; - /* A set of now blocked but previously unblocked signals. */ - signandsets(&newblocked, newset, ¤t->blocked); - retarget_shared_pending(tsk, &newblocked); - } - tsk->blocked = *newset; - recalc_sigpending(); + __set_task_blocked(tsk, newset); spin_unlock_irq(&tsk->sighand->siglock); } @@ -2541,7 +2546,8 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, /* * None ready, temporarily unblock those we're interested * while we are sleeping in so that we'll be awakened when - * they arrive. + * they arrive. Unblocking is always fine, we can avoid + * set_current_blocked(). */ tsk->real_blocked = tsk->blocked; sigandsets(&tsk->blocked, &tsk->blocked, &mask); @@ -2551,10 +2557,9 @@ int do_sigtimedwait(const sigset_t *which, siginfo_t *info, timeout = schedule_timeout_interruptible(timeout); spin_lock_irq(&tsk->sighand->siglock); - sig = dequeue_signal(tsk, &mask, info); - tsk->blocked = tsk->real_blocked; + __set_task_blocked(tsk, &tsk->real_blocked); siginitset(&tsk->real_blocked, 0); - recalc_sigpending(); + sig = dequeue_signal(tsk, &mask, info); } spin_unlock_irq(&tsk->sighand->siglock); -- cgit v1.1 From 702a5073fdb71eb29cd4912575289fb5044c1894 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 27 Apr 2011 22:01:27 +0200 Subject: signal: rename signandsets() to sigandnsets() As Tejun and Linus pointed out, "nand" is the wrong name for "x & ~y", it should be "andn". Rename signandsets() as suggested. Suggested-by: Tejun Heo Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo --- kernel/signal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index 4d97e11..e7ee4e6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -669,7 +669,7 @@ static int rm_from_queue_full(sigset_t *mask, struct sigpending *s) if (sigisemptyset(&m)) return 0; - signandsets(&s->signal, &s->signal, mask); + sigandnsets(&s->signal, &s->signal, mask); list_for_each_entry_safe(q, n, &s->list, list) { if (sigismember(mask, q->info.si_signo)) { list_del_init(&q->list); @@ -2304,7 +2304,7 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset) if (signal_pending(tsk) && !thread_group_empty(tsk)) { sigset_t newblocked; /* A set of now blocked but previously unblocked signals. */ - signandsets(&newblocked, newset, ¤t->blocked); + sigandnsets(&newblocked, newset, ¤t->blocked); retarget_shared_pending(tsk, &newblocked); } tsk->blocked = *newset; @@ -2349,7 +2349,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset) sigorsets(&newset, &tsk->blocked, set); break; case SIG_UNBLOCK: - signandsets(&newset, &tsk->blocked, set); + sigandnsets(&newset, &tsk->blocked, set); break; case SIG_SETMASK: newset = *set; -- cgit v1.1 From b013c399245a88a73aaa031279f0c4d7cea7fe68 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 28 Apr 2011 11:36:20 +0200 Subject: signal: cleanup sys_sigprocmask() Cleanup. Remove the unneeded goto's, we can simply read blocked.sig[0] unconditionally and then copy-to-user it if oset != NULL. Signed-off-by: Oleg Nesterov Acked-by: Tejun Heo Reviewed-by: Matt Fleming --- kernel/signal.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index e7ee4e6..c0af959 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2889,29 +2889,28 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set) /** * sys_sigprocmask - examine and change blocked signals * @how: whether to add, remove, or set signals - * @set: signals to add or remove (if non-null) + * @nset: signals to add or remove (if non-null) * @oset: previous value of signal mask if non-null * * Some platforms have their own version with special arguments; * others support only sys_rt_sigprocmask. */ -SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, +SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, old_sigset_t __user *, oset) { - int error; old_sigset_t old_set, new_set; + int error; - if (set) { - error = -EFAULT; - if (copy_from_user(&new_set, set, sizeof(*set))) - goto out; - new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); + old_set = current->blocked.sig[0]; - spin_lock_irq(¤t->sighand->siglock); - old_set = current->blocked.sig[0]; + if (nset) { + if (copy_from_user(&new_set, nset, sizeof(*nset))) + return -EFAULT; + new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); error = 0; + spin_lock_irq(¤t->sighand->siglock); switch (how) { default: error = -EINVAL; @@ -2930,19 +2929,15 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set, recalc_sigpending(); spin_unlock_irq(¤t->sighand->siglock); if (error) - goto out; - if (oset) - goto set_old; - } else if (oset) { - old_set = current->blocked.sig[0]; - set_old: - error = -EFAULT; + return error; + } + + if (oset) { if (copy_to_user(oset, &old_set, sizeof(*oset))) - goto out; + return -EFAULT; } - error = 0; -out: - return error; + + return 0; } #endif /* __ARCH_WANT_SYS_SIGPROCMASK */ -- cgit v1.1 From 2e4f7c7769a0b8b6b3e88b0436c6c634f146a4ef Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 9 May 2011 13:48:56 +0200 Subject: signal: sys_sigprocmask() needs retarget_shared_pending() sys_sigprocmask() changes current->blocked by hand. Convert this code to use set_current_blocked(). Signed-off-by: Oleg Nesterov --- kernel/signal.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index c0af959..b87780e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2900,7 +2900,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, old_sigset_t __user *, oset) { old_sigset_t old_set, new_set; - int error; + sigset_t new_blocked; old_set = current->blocked.sig[0]; @@ -2909,27 +2909,23 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset, return -EFAULT; new_set &= ~(sigmask(SIGKILL) | sigmask(SIGSTOP)); - error = 0; - spin_lock_irq(¤t->sighand->siglock); + new_blocked = current->blocked; + switch (how) { - default: - error = -EINVAL; - break; case SIG_BLOCK: - sigaddsetmask(¤t->blocked, new_set); + sigaddsetmask(&new_blocked, new_set); break; case SIG_UNBLOCK: - sigdelsetmask(¤t->blocked, new_set); + sigdelsetmask(&new_blocked, new_set); break; case SIG_SETMASK: - current->blocked.sig[0] = new_set; + new_blocked.sig[0] = new_set; break; + default: + return -EINVAL; } - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - if (error) - return error; + set_current_blocked(&new_blocked); } if (oset) { -- cgit v1.1 From 40ae717d1e78d982bd469b2013a4cbf4ec1ca434 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 6 May 2011 11:52:22 +0200 Subject: ptrace: fix signal->wait_chldexit usage in task_clear_group_stop_trapping() GROUP_STOP_TRAPPING waiting mechanism piggybacks on signal->wait_chldexit which is primarily used to implement waiting for wait(2) and friends. When do_wait() waits on signal->wait_chldexit, it uses a custom wake up callback, child_wait_callback(), which expects the child task which is waking up the parent to be passed in as @key to filter out spurious wakeups. task_clear_group_stop_trapping() used __wake_up_sync() which uses NULL @key causing the following oops if the parent was doing do_wait(). BUG: unable to handle kernel NULL pointer dereference at 00000000000002d8 IP: [] child_wait_callback+0x29/0x80 PGD 1d899067 PUD 1e418067 PMD 0 Oops: 0000 [#1] PREEMPT SMP last sysfs file: /sys/devices/pci0000:00/0000:00:03.0/local_cpus CPU 2 Modules linked in: Pid: 4498, comm: test-continued Not tainted 2.6.39-rc6-work+ #32 Bochs Bochs RIP: 0010:[] [] child_wait_callback+0x29/0x80 RSP: 0000:ffff88001b889bf8 EFLAGS: 00010046 RAX: 0000000000000000 RBX: ffff88001fab3af8 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000002 RDI: ffff88001d91df20 RBP: ffff88001b889c08 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000 R13: ffff88001fb70550 R14: 0000000000000000 R15: 0000000000000001 FS: 00007f26ccae4700(0000) GS:ffff88001fd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 00000000000002d8 CR3: 000000001b8ac000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 Process test-continued (pid: 4498, threadinfo ffff88001b888000, task ffff88001fb88000) Stack: ffff88001b889c18 ffff88001fb70538 ffff88001b889c58 ffffffff810312f9 0000000000000001 0000000200000001 ffff88001b889c58 ffff88001fb70518 0000000000000002 0000000000000082 0000000000000001 0000000000000000 Call Trace: [] __wake_up_common+0x59/0x90 [] __wake_up_sync_key+0x53/0x80 [] __wake_up_sync+0x10/0x20 [] task_clear_jobctl_trapping+0x44/0x50 [] ptrace_stop+0x7c/0x290 [] do_signal_stop+0x28a/0x2d0 [] get_signal_to_deliver+0x14f/0x5a0 [] do_signal+0x75/0x7b0 [] do_notify_resume+0x5d/0x70 [] retint_signal+0x46/0x8c Code: 00 00 55 48 89 e5 53 48 83 ec 08 0f 1f 44 00 00 8b 47 d8 83 f8 03 74 3a 85 c0 49 89 c8 75 23 89 c0 48 8b 5f e0 4c 8d 0c 40 31 c0 <4b> 39 9c c8 d8 02 00 00 74 1d 48 83 c4 08 5b c9 c3 66 0f 1f 44 Fix it by using __wake_up_sync_key() and passing in the child as @key. I still think it's a mistake to piggyback on wait_chldexit for this. Given the relative low frequency of ptrace use, we would be much better off leaving already complex wait_chldexit alone and using bit waitqueue. Signed-off-by: Tejun Heo Reviewed-by: Oleg Nesterov --- kernel/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/signal.c') diff --git a/kernel/signal.c b/kernel/signal.c index b87780e..35c5f4b 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -238,8 +238,8 @@ static void task_clear_group_stop_trapping(struct task_struct *task) { if (unlikely(task->group_stop & GROUP_STOP_TRAPPING)) { task->group_stop &= ~GROUP_STOP_TRAPPING; - __wake_up_sync(&task->parent->signal->wait_chldexit, - TASK_UNINTERRUPTIBLE, 1); + __wake_up_sync_key(&task->parent->signal->wait_chldexit, + TASK_UNINTERRUPTIBLE, 1, task); } } -- cgit v1.1