aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/bounds.c2
-rw-r--r--kernel/capability.c96
-rw-r--r--kernel/cpuset.c80
-rw-r--r--kernel/crash_dump.c34
-rw-r--r--kernel/cred.c6
-rw-r--r--kernel/debug/gdbstub.c30
-rw-r--r--kernel/debug/kdb/kdb_main.c4
-rw-r--r--kernel/exit.c1
-rw-r--r--kernel/fork.c11
-rw-r--r--kernel/futex.c15
-rw-r--r--kernel/futex_compat.c11
-rw-r--r--kernel/groups.c2
-rw-r--r--kernel/irq/Kconfig15
-rw-r--r--kernel/irq/autoprobe.c4
-rw-r--r--kernel/irq/chip.c283
-rw-r--r--kernel/irq/compat.h72
-rw-r--r--kernel/irq/debug.h12
-rw-r--r--kernel/irq/dummychip.c9
-rw-r--r--kernel/irq/handle.c19
-rw-r--r--kernel/irq/internals.h16
-rw-r--r--kernel/irq/irqdesc.c17
-rw-r--r--kernel/irq/manage.c102
-rw-r--r--kernel/irq/migration.c15
-rw-r--r--kernel/irq/proc.c23
-rw-r--r--kernel/irq/resend.c1
-rw-r--r--kernel/irq/settings.h55
-rw-r--r--kernel/irq/spurious.c11
-rw-r--r--kernel/kallsyms.c48
-rw-r--r--kernel/kexec.c5
-rw-r--r--kernel/lockdep_proc.c9
-rw-r--r--kernel/nsproxy.c4
-rw-r--r--kernel/perf_event.c20
-rw-r--r--kernel/pid_namespace.c11
-rw-r--r--kernel/power/block_io.c2
-rw-r--r--kernel/ptrace.c27
-rw-r--r--kernel/res_counter.c14
-rw-r--r--kernel/sched.c37
-rw-r--r--kernel/sched_fair.c5
-rw-r--r--kernel/sched_idletask.c2
-rw-r--r--kernel/sched_stoptask.c2
-rw-r--r--kernel/signal.c187
-rw-r--r--kernel/sys.c77
-rw-r--r--kernel/sysctl.c35
-rw-r--r--kernel/sysctl_check.c10
-rw-r--r--kernel/taskstats.c2
-rw-r--r--kernel/time/ntp.c2
-rw-r--r--kernel/time/timekeeping.c27
-rw-r--r--kernel/trace/blktrace.c15
-rw-r--r--kernel/trace/ftrace.c3
-rw-r--r--kernel/uid16.c2
-rw-r--r--kernel/user.c8
-rw-r--r--kernel/utsname.c12
53 files changed, 861 insertions, 652 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 353d3fe..85cbfb3 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -107,6 +107,7 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
obj-$(CONFIG_PADATA) += padata.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
# According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
diff --git a/kernel/bounds.c b/kernel/bounds.c
index 98a51f2..0c9b862 100644
--- a/kernel/bounds.c
+++ b/kernel/bounds.c
@@ -9,11 +9,13 @@
#include <linux/page-flags.h>
#include <linux/mmzone.h>
#include <linux/kbuild.h>
+#include <linux/page_cgroup.h>
void foo(void)
{
/* The enum constants to put into include/generated/bounds.h */
DEFINE(NR_PAGEFLAGS, __NR_PAGEFLAGS);
DEFINE(MAX_NR_ZONES, __MAX_NR_ZONES);
+ DEFINE(NR_PCG_FLAGS, __NR_PCG_FLAGS);
/* End of constants */
}
diff --git a/kernel/capability.c b/kernel/capability.c
index 9e9385f..bf0c734 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -14,6 +14,7 @@
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
/*
@@ -290,6 +291,60 @@ error:
}
/**
+ * has_capability - Does a task have a capability in init_user_ns
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to the initial user namespace, false if not.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_capability(struct task_struct *t, int cap)
+{
+ int ret = security_real_capable(t, &init_user_ns, cap);
+
+ return (ret == 0);
+}
+
+/**
+ * has_capability - Does a task have a capability in a specific user ns
+ * @t: The task in question
+ * @ns: target user namespace
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to the specified user namespace, false if not.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_ns_capability(struct task_struct *t,
+ struct user_namespace *ns, int cap)
+{
+ int ret = security_real_capable(t, ns, cap);
+
+ return (ret == 0);
+}
+
+/**
+ * has_capability_noaudit - Does a task have a capability (unaudited)
+ * @t: The task in question
+ * @cap: The capability to be tested for
+ *
+ * Return true if the specified task has the given superior capability
+ * currently in effect to init_user_ns, false if not. Don't write an
+ * audit message for the check.
+ *
+ * Note that this does not set PF_SUPERPRIV on the task.
+ */
+bool has_capability_noaudit(struct task_struct *t, int cap)
+{
+ int ret = security_real_capable_noaudit(t, &init_user_ns, cap);
+
+ return (ret == 0);
+}
+
+/**
* capable - Determine if the current task has a superior capability in effect
* @cap: The capability to be tested for
*
@@ -299,17 +354,48 @@ error:
* This sets PF_SUPERPRIV on the task if the capability is available on the
* assumption that it's about to be used.
*/
-int capable(int cap)
+bool capable(int cap)
+{
+ return ns_capable(&init_user_ns, cap);
+}
+EXPORT_SYMBOL(capable);
+
+/**
+ * ns_capable - Determine if the current task has a superior capability in effect
+ * @ns: The usernamespace we want the capability in
+ * @cap: The capability to be tested for
+ *
+ * Return true if the current task has the given superior capability currently
+ * available for use, false if not.
+ *
+ * This sets PF_SUPERPRIV on the task if the capability is available on the
+ * assumption that it's about to be used.
+ */
+bool ns_capable(struct user_namespace *ns, int cap)
{
if (unlikely(!cap_valid(cap))) {
printk(KERN_CRIT "capable() called with invalid cap=%u\n", cap);
BUG();
}
- if (security_capable(current_cred(), cap) == 0) {
+ if (security_capable(ns, current_cred(), cap) == 0) {
current->flags |= PF_SUPERPRIV;
- return 1;
+ return true;
}
- return 0;
+ return false;
}
-EXPORT_SYMBOL(capable);
+EXPORT_SYMBOL(ns_capable);
+
+/**
+ * task_ns_capable - Determine whether current task has a superior
+ * capability targeted at a specific task's user namespace.
+ * @t: The task whose user namespace is targeted.
+ * @cap: The capability in question.
+ *
+ * Return true if it does, false otherwise.
+ */
+bool task_ns_capable(struct task_struct *t, int cap)
+{
+ return ns_capable(task_cred_xxx(t, user)->user_ns, cap);
+}
+EXPORT_SYMBOL(task_ns_capable);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index e92e981..33eee16 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1015,17 +1015,12 @@ static void cpuset_change_nodemask(struct task_struct *p,
struct cpuset *cs;
int migrate;
const nodemask_t *oldmem = scan->data;
- NODEMASK_ALLOC(nodemask_t, newmems, GFP_KERNEL);
-
- if (!newmems)
- return;
+ static nodemask_t newmems; /* protected by cgroup_mutex */
cs = cgroup_cs(scan->cg);
- guarantee_online_mems(cs, newmems);
-
- cpuset_change_task_nodemask(p, newmems);
+ guarantee_online_mems(cs, &newmems);
- NODEMASK_FREE(newmems);
+ cpuset_change_task_nodemask(p, &newmems);
mm = get_task_mm(p);
if (!mm)
@@ -1438,44 +1433,35 @@ static void cpuset_attach(struct cgroup_subsys *ss, struct cgroup *cont,
struct mm_struct *mm;
struct cpuset *cs = cgroup_cs(cont);
struct cpuset *oldcs = cgroup_cs(oldcont);
- NODEMASK_ALLOC(nodemask_t, from, GFP_KERNEL);
- NODEMASK_ALLOC(nodemask_t, to, GFP_KERNEL);
-
- if (from == NULL || to == NULL)
- goto alloc_fail;
+ static nodemask_t to; /* protected by cgroup_mutex */
if (cs == &top_cpuset) {
cpumask_copy(cpus_attach, cpu_possible_mask);
} else {
guarantee_online_cpus(cs, cpus_attach);
}
- guarantee_online_mems(cs, to);
+ guarantee_online_mems(cs, &to);
/* do per-task migration stuff possibly for each in the threadgroup */
- cpuset_attach_task(tsk, to, cs);
+ cpuset_attach_task(tsk, &to, cs);
if (threadgroup) {
struct task_struct *c;
rcu_read_lock();
list_for_each_entry_rcu(c, &tsk->thread_group, thread_group) {
- cpuset_attach_task(c, to, cs);
+ cpuset_attach_task(c, &to, cs);
}
rcu_read_unlock();
}
/* change mm; only needs to be done once even if threadgroup */
- *from = oldcs->mems_allowed;
- *to = cs->mems_allowed;
+ to = cs->mems_allowed;
mm = get_task_mm(tsk);
if (mm) {
- mpol_rebind_mm(mm, to);
+ mpol_rebind_mm(mm, &to);
if (is_memory_migrate(cs))
- cpuset_migrate_mm(mm, from, to);
+ cpuset_migrate_mm(mm, &oldcs->mems_allowed, &to);
mmput(mm);
}
-
-alloc_fail:
- NODEMASK_FREE(from);
- NODEMASK_FREE(to);
}
/* The various types of files and directories in a cpuset file system */
@@ -1610,34 +1596,26 @@ out:
* across a page fault.
*/
-static int cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
{
- int ret;
+ size_t count;
mutex_lock(&callback_mutex);
- ret = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
+ count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
mutex_unlock(&callback_mutex);
- return ret;
+ return count;
}
-static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
+static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs)
{
- NODEMASK_ALLOC(nodemask_t, mask, GFP_KERNEL);
- int retval;
-
- if (mask == NULL)
- return -ENOMEM;
+ size_t count;
mutex_lock(&callback_mutex);
- *mask = cs->mems_allowed;
+ count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed);
mutex_unlock(&callback_mutex);
- retval = nodelist_scnprintf(page, PAGE_SIZE, *mask);
-
- NODEMASK_FREE(mask);
-
- return retval;
+ return count;
}
static ssize_t cpuset_common_file_read(struct cgroup *cont,
@@ -1862,8 +1840,10 @@ static void cpuset_post_clone(struct cgroup_subsys *ss,
cs = cgroup_cs(cgroup);
parent_cs = cgroup_cs(parent);
+ mutex_lock(&callback_mutex);
cs->mems_allowed = parent_cs->mems_allowed;
cpumask_copy(cs->cpus_allowed, parent_cs->cpus_allowed);
+ mutex_unlock(&callback_mutex);
return;
}
@@ -2066,10 +2046,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
struct cpuset *cp; /* scans cpusets being updated */
struct cpuset *child; /* scans child cpusets of cp */
struct cgroup *cont;
- NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
-
- if (oldmems == NULL)
- return;
+ static nodemask_t oldmems; /* protected by cgroup_mutex */
list_add_tail((struct list_head *)&root->stack_list, &queue);
@@ -2086,7 +2063,7 @@ static void scan_for_empty_cpusets(struct cpuset *root)
nodes_subset(cp->mems_allowed, node_states[N_HIGH_MEMORY]))
continue;
- *oldmems = cp->mems_allowed;
+ oldmems = cp->mems_allowed;
/* Remove offline cpus and mems from this cpuset. */
mutex_lock(&callback_mutex);
@@ -2102,10 +2079,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
remove_tasks_in_empty_cpuset(cp);
else {
update_tasks_cpumask(cp, NULL);
- update_tasks_nodemask(cp, oldmems, NULL);
+ update_tasks_nodemask(cp, &oldmems, NULL);
}
}
- NODEMASK_FREE(oldmems);
}
/*
@@ -2147,19 +2123,16 @@ void cpuset_update_active_cpus(void)
static int cpuset_track_online_nodes(struct notifier_block *self,
unsigned long action, void *arg)
{
- NODEMASK_ALLOC(nodemask_t, oldmems, GFP_KERNEL);
-
- if (oldmems == NULL)
- return NOTIFY_DONE;
+ static nodemask_t oldmems; /* protected by cgroup_mutex */
cgroup_lock();
switch (action) {
case MEM_ONLINE:
- *oldmems = top_cpuset.mems_allowed;
+ oldmems = top_cpuset.mems_allowed;
mutex_lock(&callback_mutex);
top_cpuset.mems_allowed = node_states[N_HIGH_MEMORY];
mutex_unlock(&callback_mutex);
- update_tasks_nodemask(&top_cpuset, oldmems, NULL);
+ update_tasks_nodemask(&top_cpuset, &oldmems, NULL);
break;
case MEM_OFFLINE:
/*
@@ -2173,7 +2146,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
}
cgroup_unlock();
- NODEMASK_FREE(oldmems);
return NOTIFY_OK;
}
#endif
diff --git a/kernel/crash_dump.c b/kernel/crash_dump.c
new file mode 100644
index 0000000..5f85690
--- /dev/null
+++ b/kernel/crash_dump.c
@@ -0,0 +1,34 @@
+#include <linux/kernel.h>
+#include <linux/crash_dump.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+
+/*
+ * If we have booted due to a crash, max_pfn will be a very low value. We need
+ * to know the amount of memory that the previous kernel used.
+ */
+unsigned long saved_max_pfn;
+
+/*
+ * stores the physical address of elf header of crash image
+ *
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence put
+ * it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
+ */
+unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX;
+
+/*
+ * elfcorehdr= specifies the location of elf core header stored by the crashed
+ * kernel. This option will be passed by kexec loader to the capture kernel.
+ */
+static int __init setup_elfcorehdr(char *arg)
+{
+ char *end;
+ if (!arg)
+ return -EINVAL;
+ elfcorehdr_addr = memparse(arg, &end);
+ return end > arg ? 0 : -EINVAL;
+}
+early_param("elfcorehdr", setup_elfcorehdr);
diff --git a/kernel/cred.c b/kernel/cred.c
index 2343c13..5557b55 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -741,6 +741,12 @@ int set_create_files_as(struct cred *new, struct inode *inode)
}
EXPORT_SYMBOL(set_create_files_as);
+struct user_namespace *current_user_ns(void)
+{
+ return _current_user_ns();
+}
+EXPORT_SYMBOL(current_user_ns);
+
#ifdef CONFIG_DEBUG_CREDENTIALS
bool creds_are_invalid(const struct cred *cred)
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 481a7bd..a11db95 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -1093,3 +1093,33 @@ int gdbstub_state(struct kgdb_state *ks, char *cmd)
put_packet(remcom_out_buffer);
return 0;
}
+
+/**
+ * gdbstub_exit - Send an exit message to GDB
+ * @status: The exit code to report.
+ */
+void gdbstub_exit(int status)
+{
+ unsigned char checksum, ch, buffer[3];
+ int loop;
+
+ buffer[0] = 'W';
+ buffer[1] = hex_asc_hi(status);
+ buffer[2] = hex_asc_lo(status);
+
+ dbg_io_ops->write_char('$');
+ checksum = 0;
+
+ for (loop = 0; loop < 3; loop++) {
+ ch = buffer[loop];
+ checksum += ch;
+ dbg_io_ops->write_char(ch);
+ }
+
+ dbg_io_ops->write_char('#');
+ dbg_io_ops->write_char(hex_asc_hi(checksum));
+ dbg_io_ops->write_char(hex_asc_lo(checksum));
+
+ /* make sure the output is flushed, lest the bootloader clobber it */
+ dbg_io_ops->flush();
+}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index bd3e8e2..6bc6e3b 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -78,7 +78,7 @@ static unsigned int kdb_continue_catastrophic;
static kdbtab_t *kdb_commands;
#define KDB_BASE_CMD_MAX 50
static int kdb_max_commands = KDB_BASE_CMD_MAX;
-static kdbtab_t kdb_base_commands[50];
+static kdbtab_t kdb_base_commands[KDB_BASE_CMD_MAX];
#define for_each_kdbcmd(cmd, num) \
for ((cmd) = kdb_base_commands, (num) = 0; \
num < kdb_max_commands; \
@@ -2892,7 +2892,7 @@ static void __init kdb_inittab(void)
"Send a signal to a process", 0, KDB_REPEAT_NONE);
kdb_register_repeat("summary", kdb_summary, "",
"Summarize the system", 4, KDB_REPEAT_NONE);
- kdb_register_repeat("per_cpu", kdb_per_cpu, "",
+ kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
"Display per_cpu variables", 3, KDB_REPEAT_NONE);
kdb_register_repeat("grephelp", kdb_grep_help, "",
"Display help on | grep", 0, KDB_REPEAT_NONE);
diff --git a/kernel/exit.c b/kernel/exit.c
index 1a0f10f..5cbc83e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -908,6 +908,7 @@ NORET_TYPE void do_exit(long code)
profile_task_exit(tsk);
WARN_ON(atomic_read(&tsk->fs_excl));
+ WARN_ON(blk_needs_flush_plug(tsk));
if (unlikely(in_interrupt()))
panic("Aiee, killing interrupt handler!");
diff --git a/kernel/fork.c b/kernel/fork.c
index f2b494d..e7548de 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1187,12 +1187,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
pid = alloc_pid(p->nsproxy->pid_ns);
if (!pid)
goto bad_fork_cleanup_io;
-
- if (clone_flags & CLONE_NEWPID) {
- retval = pid_ns_prepare_proc(p->nsproxy->pid_ns);
- if (retval < 0)
- goto bad_fork_free_pid;
- }
}
p->pid = pid_nr(pid);
@@ -1211,6 +1205,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
* Clear TID on mm_release()?
*/
p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
+#ifdef CONFIG_BLOCK
+ p->plug = NULL;
+#endif
#ifdef CONFIG_FUTEX
p->robust_list = NULL;
#ifdef CONFIG_COMPAT
@@ -1296,7 +1293,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
tracehook_finish_clone(p, clone_flags, trace);
if (thread_group_leader(p)) {
- if (clone_flags & CLONE_NEWPID)
+ if (is_child_reaper(pid))
p->nsproxy->pid_ns->child_reaper = p;
p->signal->leader_pid = pid;
diff --git a/kernel/futex.c b/kernel/futex.c
index bda4157..dfb924f 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -782,8 +782,8 @@ static void __unqueue_futex(struct futex_q *q)
{
struct futex_hash_bucket *hb;
- if (WARN_ON(!q->lock_ptr || !spin_is_locked(q->lock_ptr)
- || plist_node_empty(&q->list)))
+ if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
+ || WARN_ON(plist_node_empty(&q->list)))
return;
hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
@@ -2418,10 +2418,19 @@ SYSCALL_DEFINE3(get_robust_list, int, pid,
goto err_unlock;
ret = -EPERM;
pcred = __task_cred(p);
+ /* If victim is in different user_ns, then uids are not
+ comparable, so we must have CAP_SYS_PTRACE */
+ if (cred->user->user_ns != pcred->user->user_ns) {
+ if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
+ goto err_unlock;
+ goto ok;
+ }
+ /* If victim is in same user_ns, then uids are comparable */
if (cred->euid != pcred->euid &&
cred->euid != pcred->uid &&
- !capable(CAP_SYS_PTRACE))
+ !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
goto err_unlock;
+ok:
head = p->robust_list;
rcu_read_unlock();
}
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index a7934ac..5f9e689 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -153,10 +153,19 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr,
goto err_unlock;
ret = -EPERM;
pcred = __task_cred(p);
+ /* If victim is in different user_ns, then uids are not
+ comparable, so we must have CAP_SYS_PTRACE */
+ if (cred->user->user_ns != pcred->user->user_ns) {
+ if (!ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
+ goto err_unlock;
+ goto ok;
+ }
+ /* If victim is in same user_ns, then uids are comparable */
if (cred->euid != pcred->euid &&
cred->euid != pcred->uid &&
- !capable(CAP_SYS_PTRACE))
+ !ns_capable(pcred->user->user_ns, CAP_SYS_PTRACE))
goto err_unlock;
+ok:
head = p->compat_robust_list;
rcu_read_unlock();
}
diff --git a/kernel/groups.c b/kernel/groups.c
index 253dc0f..1cc476d 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -233,7 +233,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!capable(CAP_SETGID))
+ if (!nsown_capable(CAP_SETGID))
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 09bef82..c574f9a 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -10,13 +10,6 @@ menu "IRQ subsystem"
config GENERIC_HARDIRQS
def_bool y
-# Select this to disable the deprecated stuff
-config GENERIC_HARDIRQS_NO_DEPRECATED
- bool
-
-config GENERIC_HARDIRQS_NO_COMPAT
- bool
-
# Options selectable by the architecture code
# Make sparse irq Kconfig switch below available
@@ -31,6 +24,10 @@ config GENERIC_IRQ_PROBE
config GENERIC_IRQ_SHOW
bool
+# Print level/edge extra information
+config GENERIC_IRQ_SHOW_LEVEL
+ bool
+
# Support for delayed migration from interrupt context
config GENERIC_PENDING_IRQ
bool
@@ -47,6 +44,10 @@ config HARDIRQS_SW_RESEND
config IRQ_PREFLOW_FASTEOI
bool
+# Edge style eoi based handler (cell)
+config IRQ_EDGE_EOI_HANDLER
+ bool
+
# Support forced irq threading
config IRQ_FORCED_THREADING
bool
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 394784c..342d8f4 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -70,10 +70,8 @@ unsigned long probe_irq_on(void)
raw_spin_lock_irq(&desc->lock);
if (!desc->action && irq_settings_can_probe(desc)) {
desc->istate |= IRQS_AUTODETECT | IRQS_WAITING;
- if (irq_startup(desc)) {
- irq_compat_set_pending(desc);
+ if (irq_startup(desc))
desc->istate |= IRQS_PENDING;
- }
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index c9c0601..1dafc86 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -34,9 +34,14 @@ int irq_set_chip(unsigned int irq, struct irq_chip *chip)
if (!chip)
chip = &no_irq_chip;
- irq_chip_set_defaults(chip);
desc->irq_data.chip = chip;
irq_put_desc_unlock(desc, flags);
+ /*
+ * For !CONFIG_SPARSE_IRQ make the irq show up in
+ * allocated_irqs. For the CONFIG_SPARSE_IRQ case, it is
+ * already marked, and this call is harmless.
+ */
+ irq_reserve_irq(irq);
return 0;
}
EXPORT_SYMBOL(irq_set_chip);
@@ -134,26 +139,22 @@ EXPORT_SYMBOL_GPL(irq_get_irq_data);
static void irq_state_clr_disabled(struct irq_desc *desc)
{
- desc->istate &= ~IRQS_DISABLED;
- irq_compat_clr_disabled(desc);
+ irqd_clear(&desc->irq_data, IRQD_IRQ_DISABLED);
}
static void irq_state_set_disabled(struct irq_desc *desc)
{
- desc->istate |= IRQS_DISABLED;
- irq_compat_set_disabled(desc);
+ irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
}
static void irq_state_clr_masked(struct irq_desc *desc)
{
- desc->istate &= ~IRQS_MASKED;
- irq_compat_clr_masked(desc);
+ irqd_clear(&desc->irq_data, IRQD_IRQ_MASKED);
}
static void irq_state_set_masked(struct irq_desc *desc)
{
- desc->istate |= IRQS_MASKED;
- irq_compat_set_masked(desc);
+ irqd_set(&desc->irq_data, IRQD_IRQ_MASKED);
}
int irq_startup(struct irq_desc *desc)
@@ -203,126 +204,6 @@ void irq_disable(struct irq_desc *desc)
}
}
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-/* Temporary migration helpers */
-static void compat_irq_mask(struct irq_data *data)
-{
- data->chip->mask(data->irq);
-}
-
-static void compat_irq_unmask(struct irq_data *data)
-{
- data->chip->unmask(data->irq);
-}
-
-static void compat_irq_ack(struct irq_data *data)
-{
- data->chip->ack(data->irq);
-}
-
-static void compat_irq_mask_ack(struct irq_data *data)
-{
- data->chip->mask_ack(data->irq);
-}
-
-static void compat_irq_eoi(struct irq_data *data)
-{
- data->chip->eoi(data->irq);
-}
-
-static void compat_irq_enable(struct irq_data *data)
-{
- data->chip->enable(data->irq);
-}
-
-static void compat_irq_disable(struct irq_data *data)
-{
- data->chip->disable(data->irq);
-}
-
-static void compat_irq_shutdown(struct irq_data *data)
-{
- data->chip->shutdown(data->irq);
-}
-
-static unsigned int compat_irq_startup(struct irq_data *data)
-{
- return data->chip->startup(data->irq);
-}
-
-static int compat_irq_set_affinity(struct irq_data *data,
- const struct cpumask *dest, bool force)
-{
- return data->chip->set_affinity(data->irq, dest);
-}
-
-static int compat_irq_set_type(struct irq_data *data, unsigned int type)
-{
- return data->chip->set_type(data->irq, type);
-}
-
-static int compat_irq_set_wake(struct irq_data *data, unsigned int on)
-{
- return data->chip->set_wake(data->irq, on);
-}
-
-static int compat_irq_retrigger(struct irq_data *data)
-{
- return data->chip->retrigger(data->irq);
-}
-
-static void compat_bus_lock(struct irq_data *data)
-{
- data->chip->bus_lock(data->irq);
-}
-
-static void compat_bus_sync_unlock(struct irq_data *data)
-{
- data->chip->bus_sync_unlock(data->irq);
-}
-#endif
-
-/*
- * Fixup enable/disable function pointers
- */
-void irq_chip_set_defaults(struct irq_chip *chip)
-{
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
- if (chip->enable)
- chip->irq_enable = compat_irq_enable;
- if (chip->disable)
- chip->irq_disable = compat_irq_disable;
- if (chip->shutdown)
- chip->irq_shutdown = compat_irq_shutdown;
- if (chip->startup)
- chip->irq_startup = compat_irq_startup;
- if (!chip->end)
- chip->end = dummy_irq_chip.end;
- if (chip->bus_lock)
- chip->irq_bus_lock = compat_bus_lock;
- if (chip->bus_sync_unlock)
- chip->irq_bus_sync_unlock = compat_bus_sync_unlock;
- if (chip->mask)
- chip->irq_mask = compat_irq_mask;
- if (chip->unmask)
- chip->irq_unmask = compat_irq_unmask;
- if (chip->ack)
- chip->irq_ack = compat_irq_ack;
- if (chip->mask_ack)
- chip->irq_mask_ack = compat_irq_mask_ack;
- if (chip->eoi)
- chip->irq_eoi = compat_irq_eoi;
- if (chip->set_affinity)
- chip->irq_set_affinity = compat_irq_set_affinity;
- if (chip->set_type)
- chip->irq_set_type = compat_irq_set_type;
- if (chip->set_wake)
- chip->irq_set_wake = compat_irq_set_wake;
- if (chip->retrigger)
- chip->irq_retrigger = compat_irq_retrigger;
-#endif
-}
-
static inline void mask_ack_irq(struct irq_desc *desc)
{
if (desc->irq_data.chip->irq_mask_ack)
@@ -372,11 +253,10 @@ void handle_nested_irq(unsigned int irq)
kstat_incr_irqs_this_cpu(irq, desc);
action = desc->action;
- if (unlikely(!action || (desc->istate & IRQS_DISABLED)))
+ if (unlikely(!action || irqd_irq_disabled(&desc->irq_data)))
goto out_unlock;
- irq_compat_set_progress(desc);
- desc->istate |= IRQS_INPROGRESS;
+ irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock_irq(&desc->lock);
action_ret = action->thread_fn(action->irq, action->dev_id);
@@ -384,8 +264,7 @@ void handle_nested_irq(unsigned int irq)
note_interrupt(irq, desc, action_ret);
raw_spin_lock_irq(&desc->lock);
- desc->istate &= ~IRQS_INPROGRESS;
- irq_compat_clr_progress(desc);
+ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
out_unlock:
raw_spin_unlock_irq(&desc->lock);
@@ -416,14 +295,14 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);
- if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
if (!irq_check_poll(desc))
goto out_unlock;
desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
kstat_incr_irqs_this_cpu(irq, desc);
- if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
+ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data)))
goto out_unlock;
handle_irq_event(desc);
@@ -448,7 +327,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
raw_spin_lock(&desc->lock);
mask_ack_irq(desc);
- if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
if (!irq_check_poll(desc))
goto out_unlock;
@@ -459,12 +338,12 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
* If its disabled or no action available
* keep it masked and get out of here
*/
- if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED)))
+ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data)))
goto out_unlock;
handle_irq_event(desc);
- if (!(desc->istate & (IRQS_DISABLED | IRQS_ONESHOT)))
+ if (!irqd_irq_disabled(&desc->irq_data) && !(desc->istate & IRQS_ONESHOT))
unmask_irq(desc);
out_unlock:
raw_spin_unlock(&desc->lock);
@@ -496,7 +375,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
{
raw_spin_lock(&desc->lock);
- if (unlikely(desc->istate & IRQS_INPROGRESS))
+ if (unlikely(irqd_irq_inprogress(&desc->irq_data)))
if (!irq_check_poll(desc))
goto out;
@@ -507,8 +386,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
* If its disabled or no action available
* then mask it and get out of here:
*/
- if (unlikely(!desc->action || (desc->istate & IRQS_DISABLED))) {
- irq_compat_set_pending(desc);
+ if (unlikely(!desc->action || irqd_irq_disabled(&desc->irq_data))) {
desc->istate |= IRQS_PENDING;
mask_irq(desc);
goto out;
@@ -558,10 +436,9 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
* we shouldn't process the IRQ. Mark it pending, handle
* the necessary masking and go out
*/
- if (unlikely((desc->istate & (IRQS_DISABLED | IRQS_INPROGRESS) ||
- !desc->action))) {
+ if (unlikely(irqd_irq_disabled(&desc->irq_data) ||
+ irqd_irq_inprogress(&desc->irq_data) || !desc->action)) {
if (!irq_check_poll(desc)) {
- irq_compat_set_pending(desc);
desc->istate |= IRQS_PENDING;
mask_ack_irq(desc);
goto out_unlock;
@@ -584,20 +461,65 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
* Renable it, if it was not disabled in meantime.
*/
if (unlikely(desc->istate & IRQS_PENDING)) {
- if (!(desc->istate & IRQS_DISABLED) &&
- (desc->istate & IRQS_MASKED))
+ if (!irqd_irq_disabled(&desc->irq_data) &&
+ irqd_irq_masked(&desc->irq_data))
unmask_irq(desc);
}
handle_irq_event(desc);
} while ((desc->istate & IRQS_PENDING) &&
- !(desc->istate & IRQS_DISABLED));
+ !irqd_irq_disabled(&desc->irq_data));
out_unlock:
raw_spin_unlock(&desc->lock);
}
+#ifdef CONFIG_IRQ_EDGE_EOI_HANDLER
+/**
+ * handle_edge_eoi_irq - edge eoi type IRQ handler
+ * @irq: the interrupt number
+ * @desc: the interrupt description structure for this irq
+ *
+ * Similar as the above handle_edge_irq, but using eoi and w/o the
+ * mask/unmask logic.
+ */
+void handle_edge_eoi_irq(unsigned int irq, struct irq_desc *desc)
+{
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+
+ raw_spin_lock(&desc->lock);
+
+ desc->istate &= ~(IRQS_REPLAY | IRQS_WAITING);
+ /*
+ * If we're currently running this IRQ, or its disabled,
+ * we shouldn't process the IRQ. Mark it pending, handle
+ * the necessary masking and go out
+ */
+ if (unlikely(irqd_irq_disabled(&desc->irq_data) ||
+ irqd_irq_inprogress(&desc->irq_data) || !desc->action)) {
+ if (!irq_check_poll(desc)) {
+ desc->istate |= IRQS_PENDING;
+ goto out_eoi;
+ }
+ }
+ kstat_incr_irqs_this_cpu(irq, desc);
+
+ do {
+ if (unlikely(!desc->action))
+ goto out_eoi;
+
+ handle_irq_event(desc);
+
+ } while ((desc->istate & IRQS_PENDING) &&
+ !irqd_irq_disabled(&desc->irq_data));
+
+out_eoi:
+ chip->irq_eoi(&desc->irq_data);
+ raw_spin_unlock(&desc->lock);
+}
+#endif
+
/**
* handle_percpu_irq - Per CPU local irq handler
* @irq: the interrupt number
@@ -642,8 +564,7 @@ __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
if (handle == handle_bad_irq) {
if (desc->irq_data.chip != &no_irq_chip)
mask_ack_irq(desc);
- irq_compat_set_disabled(desc);
- desc->istate |= IRQS_DISABLED;
+ irq_state_set_disabled(desc);
desc->depth = 1;
}
desc->handle_irq = handle;
@@ -684,8 +605,70 @@ void irq_modify_status(unsigned int irq, unsigned long clr, unsigned long set)
irqd_set(&desc->irq_data, IRQD_PER_CPU);
if (irq_settings_can_move_pcntxt(desc))
irqd_set(&desc->irq_data, IRQD_MOVE_PCNTXT);
+ if (irq_settings_is_level(desc))
+ irqd_set(&desc->irq_data, IRQD_LEVEL);
irqd_set(&desc->irq_data, irq_settings_get_trigger_mask(desc));
irq_put_desc_unlock(desc, flags);
}
+
+/**
+ * irq_cpu_online - Invoke all irq_cpu_online functions.
+ *
+ * Iterate through all irqs and invoke the chip.irq_cpu_online()
+ * for each.
+ */
+void irq_cpu_online(void)
+{
+ struct irq_desc *desc;
+ struct irq_chip *chip;
+ unsigned long flags;
+ unsigned int irq;
+
+ for_each_active_irq(irq) {
+ desc = irq_to_desc(irq);
+ if (!desc)
+ continue;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+
+ chip = irq_data_get_irq_chip(&desc->irq_data);
+ if (chip && chip->irq_cpu_online &&
+ (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
+ !irqd_irq_disabled(&desc->irq_data)))
+ chip->irq_cpu_online(&desc->irq_data);
+
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ }
+}
+
+/**
+ * irq_cpu_offline - Invoke all irq_cpu_offline functions.
+ *
+ * Iterate through all irqs and invoke the chip.irq_cpu_offline()
+ * for each.
+ */
+void irq_cpu_offline(void)
+{
+ struct irq_desc *desc;
+ struct irq_chip *chip;
+ unsigned long flags;
+ unsigned int irq;
+
+ for_each_active_irq(irq) {
+ desc = irq_to_desc(irq);
+ if (!desc)
+ continue;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+
+ chip = irq_data_get_irq_chip(&desc->irq_data);
+ if (chip && chip->irq_cpu_offline &&
+ (!(chip->flags & IRQCHIP_ONOFFLINE_ENABLED) ||
+ !irqd_irq_disabled(&desc->irq_data)))
+ chip->irq_cpu_offline(&desc->irq_data);
+
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+ }
+}
diff --git a/kernel/irq/compat.h b/kernel/irq/compat.h
deleted file mode 100644
index 6bbaf66..0000000
--- a/kernel/irq/compat.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Compat layer for transition period
- */
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
-static inline void irq_compat_set_progress(struct irq_desc *desc)
-{
- desc->status |= IRQ_INPROGRESS;
-}
-
-static inline void irq_compat_clr_progress(struct irq_desc *desc)
-{
- desc->status &= ~IRQ_INPROGRESS;
-}
-static inline void irq_compat_set_disabled(struct irq_desc *desc)
-{
- desc->status |= IRQ_DISABLED;
-}
-static inline void irq_compat_clr_disabled(struct irq_desc *desc)
-{
- desc->status &= ~IRQ_DISABLED;
-}
-static inline void irq_compat_set_pending(struct irq_desc *desc)
-{
- desc->status |= IRQ_PENDING;
-}
-
-static inline void irq_compat_clr_pending(struct irq_desc *desc)
-{
- desc->status &= ~IRQ_PENDING;
-}
-static inline void irq_compat_set_masked(struct irq_desc *desc)
-{
- desc->status |= IRQ_MASKED;
-}
-
-static inline void irq_compat_clr_masked(struct irq_desc *desc)
-{
- desc->status &= ~IRQ_MASKED;
-}
-static inline void irq_compat_set_move_pending(struct irq_desc *desc)
-{
- desc->status |= IRQ_MOVE_PENDING;
-}
-
-static inline void irq_compat_clr_move_pending(struct irq_desc *desc)
-{
- desc->status &= ~IRQ_MOVE_PENDING;
-}
-static inline void irq_compat_set_affinity(struct irq_desc *desc)
-{
- desc->status |= IRQ_AFFINITY_SET;
-}
-
-static inline void irq_compat_clr_affinity(struct irq_desc *desc)
-{
- desc->status &= ~IRQ_AFFINITY_SET;
-}
-#else
-static inline void irq_compat_set_progress(struct irq_desc *desc) { }
-static inline void irq_compat_clr_progress(struct irq_desc *desc) { }
-static inline void irq_compat_set_disabled(struct irq_desc *desc) { }
-static inline void irq_compat_clr_disabled(struct irq_desc *desc) { }
-static inline void irq_compat_set_pending(struct irq_desc *desc) { }
-static inline void irq_compat_clr_pending(struct irq_desc *desc) { }
-static inline void irq_compat_set_masked(struct irq_desc *desc) { }
-static inline void irq_compat_clr_masked(struct irq_desc *desc) { }
-static inline void irq_compat_set_move_pending(struct irq_desc *desc) { }
-static inline void irq_compat_clr_move_pending(struct irq_desc *desc) { }
-static inline void irq_compat_set_affinity(struct irq_desc *desc) { }
-static inline void irq_compat_clr_affinity(struct irq_desc *desc) { }
-#endif
-
diff --git a/kernel/irq/debug.h b/kernel/irq/debug.h
index d1a33b7..306cba3 100644
--- a/kernel/irq/debug.h
+++ b/kernel/irq/debug.h
@@ -4,8 +4,10 @@
#include <linux/kallsyms.h>
-#define P(f) if (desc->status & f) printk("%14s set\n", #f)
+#define P(f) if (desc->status_use_accessors & f) printk("%14s set\n", #f)
#define PS(f) if (desc->istate & f) printk("%14s set\n", #f)
+/* FIXME */
+#define PD(f) do { } while (0)
static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
{
@@ -28,13 +30,15 @@ static inline void print_irq_desc(unsigned int irq, struct irq_desc *desc)
P(IRQ_NOAUTOEN);
PS(IRQS_AUTODETECT);
- PS(IRQS_INPROGRESS);
PS(IRQS_REPLAY);
PS(IRQS_WAITING);
- PS(IRQS_DISABLED);
PS(IRQS_PENDING);
- PS(IRQS_MASKED);
+
+ PD(IRQS_INPROGRESS);
+ PD(IRQS_DISABLED);
+ PD(IRQS_MASKED);
}
#undef P
#undef PS
+#undef PD
diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
index 20dc547..b5fcd96 100644
--- a/kernel/irq/dummychip.c
+++ b/kernel/irq/dummychip.c
@@ -31,13 +31,6 @@ static unsigned int noop_ret(struct irq_data *data)
return 0;
}
-#ifndef CONFIG_GENERIC_HARDIRQS_NO_DEPRECATED
-static void compat_noop(unsigned int irq) { }
-#define END_INIT .end = compat_noop
-#else
-#define END_INIT
-#endif
-
/*
* Generic no controller implementation
*/
@@ -48,7 +41,6 @@ struct irq_chip no_irq_chip = {
.irq_enable = noop,
.irq_disable = noop,
.irq_ack = ack_bad,
- END_INIT
};
/*
@@ -64,5 +56,4 @@ struct irq_chip dummy_irq_chip = {
.irq_ack = noop,
.irq_mask = noop,
.irq_unmask = noop,
- END_INIT
};
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 517561f..90cb55f 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -175,28 +175,13 @@ irqreturn_t handle_irq_event(struct irq_desc *desc)
struct irqaction *action = desc->action;
irqreturn_t ret;
- irq_compat_clr_pending(desc);
desc->istate &= ~IRQS_PENDING;
- irq_compat_set_progress(desc);
- desc->istate |= IRQS_INPROGRESS;
+ irqd_set(&desc->irq_data, IRQD_IRQ_INPROGRESS);
raw_spin_unlock(&desc->lock);
ret = handle_irq_event_percpu(desc, action);
raw_spin_lock(&desc->lock);
- desc->istate &= ~IRQS_INPROGRESS;
- irq_compat_clr_progress(desc);
+ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
return ret;
}
-
-/**
- * handle_IRQ_event - irq action chain handler
- * @irq: the interrupt number
- * @action: the interrupt action chain for this irq
- *
- * Handles the action chain of an irq event
- */
-irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
-{
- return handle_irq_event_percpu(irq_to_desc(irq), action);
-}
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 6c6ec9a..6546431 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -15,10 +15,6 @@
#define istate core_internal_state__do_not_mess_with_it
-#ifdef CONFIG_GENERIC_HARDIRQS_NO_COMPAT
-# define status status_use_accessors
-#endif
-
extern int noirqdebug;
/*
@@ -44,38 +40,28 @@ enum {
* IRQS_SPURIOUS_DISABLED - was disabled due to spurious interrupt
* detection
* IRQS_POLL_INPROGRESS - polling in progress
- * IRQS_INPROGRESS - Interrupt in progress
* IRQS_ONESHOT - irq is not unmasked in primary handler
* IRQS_REPLAY - irq is replayed
* IRQS_WAITING - irq is waiting
- * IRQS_DISABLED - irq is disabled
* IRQS_PENDING - irq is pending and replayed later
- * IRQS_MASKED - irq is masked
* IRQS_SUSPENDED - irq is suspended
*/
enum {
IRQS_AUTODETECT = 0x00000001,
IRQS_SPURIOUS_DISABLED = 0x00000002,
IRQS_POLL_INPROGRESS = 0x00000008,
- IRQS_INPROGRESS = 0x00000010,
IRQS_ONESHOT = 0x00000020,
IRQS_REPLAY = 0x00000040,
IRQS_WAITING = 0x00000080,
- IRQS_DISABLED = 0x00000100,
IRQS_PENDING = 0x00000200,
- IRQS_MASKED = 0x00000400,
IRQS_SUSPENDED = 0x00000800,
};
-#include "compat.h"
#include "debug.h"
#include "settings.h"
#define irq_data_to_desc(data) container_of(data, struct irq_desc, irq_data)
-/* Set default functions for irq_chip structures: */
-extern void irq_chip_set_defaults(struct irq_chip *chip);
-
extern int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
unsigned long flags);
extern void __disable_irq(struct irq_desc *desc, unsigned int irq, bool susp);
@@ -162,13 +148,11 @@ irq_put_desc_unlock(struct irq_desc *desc, unsigned long flags)
static inline void irqd_set_move_pending(struct irq_data *d)
{
d->state_use_accessors |= IRQD_SETAFFINITY_PENDING;
- irq_compat_set_move_pending(irq_data_to_desc(d));
}
static inline void irqd_clr_move_pending(struct irq_data *d)
{
d->state_use_accessors &= ~IRQD_SETAFFINITY_PENDING;
- irq_compat_clr_move_pending(irq_data_to_desc(d));
}
static inline void irqd_clear(struct irq_data *d, unsigned int mask)
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index dbccc79..2c039c9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -80,7 +80,7 @@ static void desc_set_defaults(unsigned int irq, struct irq_desc *desc, int node)
desc->irq_data.handler_data = NULL;
desc->irq_data.msi_desc = NULL;
irq_settings_clr_and_set(desc, ~0, _IRQ_DEFAULT_INIT_FLAGS);
- desc->istate = IRQS_DISABLED;
+ irqd_set(&desc->irq_data, IRQD_IRQ_DISABLED);
desc->handle_irq = handle_bad_irq;
desc->depth = 1;
desc->irq_count = 0;
@@ -198,15 +198,6 @@ err:
return -ENOMEM;
}
-struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node)
-{
- int res = irq_alloc_descs(irq, irq, 1, node);
-
- if (res == -EEXIST || res == irq)
- return irq_to_desc(irq);
- return NULL;
-}
-
static int irq_expand_nr_irqs(unsigned int nr)
{
if (nr > IRQ_BITMAP_BITS)
@@ -247,7 +238,6 @@ int __init early_irq_init(void)
struct irq_desc irq_desc[NR_IRQS] __cacheline_aligned_in_smp = {
[0 ... NR_IRQS-1] = {
- .istate = IRQS_DISABLED,
.handle_irq = handle_bad_irq,
.depth = 1,
.lock = __RAW_SPIN_LOCK_UNLOCKED(irq_desc->lock),
@@ -283,11 +273,6 @@ struct irq_desc *irq_to_desc(unsigned int irq)
return (irq < NR_IRQS) ? irq_desc + irq : NULL;
}
-struct irq_desc *irq_to_desc_alloc_node(unsigned int irq, int node)
-{
- return irq_to_desc(irq);
-}
-
static void free_desc(unsigned int irq)
{
dynamic_irq_cleanup(irq);
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a2aa73..12a80fd 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -41,7 +41,7 @@ early_param("threadirqs", setup_forced_irqthreads);
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
- unsigned int state;
+ bool inprogress;
if (!desc)
return;
@@ -53,16 +53,16 @@ void synchronize_irq(unsigned int irq)
* Wait until we're out of the critical section. This might
* give the wrong answer due to the lack of memory barriers.
*/
- while (desc->istate & IRQS_INPROGRESS)
+ while (irqd_irq_inprogress(&desc->irq_data))
cpu_relax();
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
- state = desc->istate;
+ inprogress = irqd_irq_inprogress(&desc->irq_data);
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
- } while (state & IRQS_INPROGRESS);
+ } while (inprogress);
/*
* We made sure that no hardirq handler is running. Now verify
@@ -112,13 +112,13 @@ void irq_set_thread_affinity(struct irq_desc *desc)
}
#ifdef CONFIG_GENERIC_PENDING_IRQ
-static inline bool irq_can_move_pcntxt(struct irq_desc *desc)
+static inline bool irq_can_move_pcntxt(struct irq_data *data)
{
- return irq_settings_can_move_pcntxt(desc);
+ return irqd_can_move_in_process_context(data);
}
-static inline bool irq_move_pending(struct irq_desc *desc)
+static inline bool irq_move_pending(struct irq_data *data)
{
- return irqd_is_setaffinity_pending(&desc->irq_data);
+ return irqd_is_setaffinity_pending(data);
}
static inline void
irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask)
@@ -131,43 +131,34 @@ irq_get_pending(struct cpumask *mask, struct irq_desc *desc)
cpumask_copy(mask, desc->pending_mask);
}
#else
-static inline bool irq_can_move_pcntxt(struct irq_desc *desc) { return true; }
-static inline bool irq_move_pending(struct irq_desc *desc) { return false; }
+static inline bool irq_can_move_pcntxt(struct irq_data *data) { return true; }
+static inline bool irq_move_pending(struct irq_data *data) { return false; }
static inline void
irq_copy_pending(struct irq_desc *desc, const struct cpumask *mask) { }
static inline void
irq_get_pending(struct cpumask *mask, struct irq_desc *desc) { }
#endif
-/**
- * irq_set_affinity - Set the irq affinity of a given irq
- * @irq: Interrupt to set affinity
- * @cpumask: cpumask
- *
- */
-int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
+int __irq_set_affinity_locked(struct irq_data *data, const struct cpumask *mask)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_chip *chip = desc->irq_data.chip;
- unsigned long flags;
+ struct irq_chip *chip = irq_data_get_irq_chip(data);
+ struct irq_desc *desc = irq_data_to_desc(data);
int ret = 0;
- if (!chip->irq_set_affinity)
+ if (!chip || !chip->irq_set_affinity)
return -EINVAL;
- raw_spin_lock_irqsave(&desc->lock, flags);
-
- if (irq_can_move_pcntxt(desc)) {
- ret = chip->irq_set_affinity(&desc->irq_data, mask, false);
+ if (irq_can_move_pcntxt(data)) {
+ ret = chip->irq_set_affinity(data, mask, false);
switch (ret) {
case IRQ_SET_MASK_OK:
- cpumask_copy(desc->irq_data.affinity, mask);
+ cpumask_copy(data->affinity, mask);
case IRQ_SET_MASK_OK_NOCOPY:
irq_set_thread_affinity(desc);
ret = 0;
}
} else {
- irqd_set_move_pending(&desc->irq_data);
+ irqd_set_move_pending(data);
irq_copy_pending(desc, mask);
}
@@ -175,8 +166,28 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
kref_get(&desc->affinity_notify->kref);
schedule_work(&desc->affinity_notify->work);
}
- irq_compat_set_affinity(desc);
- irqd_set(&desc->irq_data, IRQD_AFFINITY_SET);
+ irqd_set(data, IRQD_AFFINITY_SET);
+
+ return ret;
+}
+
+/**
+ * irq_set_affinity - Set the irq affinity of a given irq
+ * @irq: Interrupt to set affinity
+ * @mask: cpumask
+ *
+ */
+int irq_set_affinity(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ unsigned long flags;
+ int ret;
+
+ if (!desc)
+ return -EINVAL;
+
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ ret = __irq_set_affinity_locked(irq_desc_get_irq_data(desc), mask);
raw_spin_unlock_irqrestore(&desc->lock, flags);
return ret;
}
@@ -206,7 +217,7 @@ static void irq_affinity_notify(struct work_struct *work)
goto out;
raw_spin_lock_irqsave(&desc->lock, flags);
- if (irq_move_pending(desc))
+ if (irq_move_pending(&desc->irq_data))
irq_get_pending(cpumask, desc);
else
cpumask_copy(cpumask, desc->irq_data.affinity);
@@ -285,10 +296,8 @@ setup_affinity(unsigned int irq, struct irq_desc *desc, struct cpumask *mask)
if (cpumask_intersects(desc->irq_data.affinity,
cpu_online_mask))
set = desc->irq_data.affinity;
- else {
- irq_compat_clr_affinity(desc);
+ else
irqd_clear(&desc->irq_data, IRQD_AFFINITY_SET);
- }
}
cpumask_and(mask, cpu_online_mask, set);
@@ -551,9 +560,9 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
flags &= IRQ_TYPE_SENSE_MASK;
if (chip->flags & IRQCHIP_SET_TYPE_MASKED) {
- if (!(desc->istate & IRQS_MASKED))
+ if (!irqd_irq_masked(&desc->irq_data))
mask_irq(desc);
- if (!(desc->istate & IRQS_DISABLED))
+ if (!irqd_irq_disabled(&desc->irq_data))
unmask = 1;
}
@@ -575,8 +584,6 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
irqd_set(&desc->irq_data, IRQD_LEVEL);
}
- if (chip != desc->irq_data.chip)
- irq_chip_set_defaults(desc->irq_data.chip);
ret = 0;
break;
default:
@@ -651,7 +658,7 @@ again:
* irq_wake_thread(). See the comment there which explains the
* serialization.
*/
- if (unlikely(desc->istate & IRQS_INPROGRESS)) {
+ if (unlikely(irqd_irq_inprogress(&desc->irq_data))) {
raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(desc);
cpu_relax();
@@ -668,12 +675,10 @@ again:
desc->threads_oneshot &= ~action->thread_mask;
- if (!desc->threads_oneshot && !(desc->istate & IRQS_DISABLED) &&
- (desc->istate & IRQS_MASKED)) {
- irq_compat_clr_masked(desc);
- desc->istate &= ~IRQS_MASKED;
- desc->irq_data.chip->irq_unmask(&desc->irq_data);
- }
+ if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
+ irqd_irq_masked(&desc->irq_data))
+ unmask_irq(desc);
+
out_unlock:
raw_spin_unlock_irq(&desc->lock);
chip_bus_sync_unlock(desc);
@@ -767,7 +772,7 @@ static int irq_thread(void *data)
atomic_inc(&desc->threads_active);
raw_spin_lock_irq(&desc->lock);
- if (unlikely(desc->istate & IRQS_DISABLED)) {
+ if (unlikely(irqd_irq_disabled(&desc->irq_data))) {
/*
* CHECKME: We might need a dedicated
* IRQ_THREAD_PENDING flag here, which
@@ -775,7 +780,6 @@ static int irq_thread(void *data)
* but AFAICT IRQS_PENDING should be fine as it
* retriggers the interrupt itself --- tglx
*/
- irq_compat_set_pending(desc);
desc->istate |= IRQS_PENDING;
raw_spin_unlock_irq(&desc->lock);
} else {
@@ -971,8 +975,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
new->thread_mask = 1 << ffz(thread_mask);
if (!shared) {
- irq_chip_set_defaults(desc->irq_data.chip);
-
init_waitqueue_head(&desc->wait_for_threads);
/* Setup the type (level, edge polarity) if configured: */
@@ -985,8 +987,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
}
desc->istate &= ~(IRQS_AUTODETECT | IRQS_SPURIOUS_DISABLED | \
- IRQS_INPROGRESS | IRQS_ONESHOT | \
- IRQS_WAITING);
+ IRQS_ONESHOT | IRQS_WAITING);
+ irqd_clear(&desc->irq_data, IRQD_IRQ_INPROGRESS);
if (new->flags & IRQF_PERCPU) {
irqd_set(&desc->irq_data, IRQD_PER_CPU);
diff --git a/kernel/irq/migration.c b/kernel/irq/migration.c
index ec4806d..bc61946 100644
--- a/kernel/irq/migration.c
+++ b/kernel/irq/migration.c
@@ -53,20 +53,14 @@ void irq_move_masked_irq(struct irq_data *idata)
cpumask_clear(desc->pending_mask);
}
-void move_masked_irq(int irq)
-{
- irq_move_masked_irq(irq_get_irq_data(irq));
-}
-
void irq_move_irq(struct irq_data *idata)
{
- struct irq_desc *desc = irq_data_to_desc(idata);
bool masked;
if (likely(!irqd_is_setaffinity_pending(idata)))
return;
- if (unlikely(desc->istate & IRQS_DISABLED))
+ if (unlikely(irqd_irq_disabled(idata)))
return;
/*
@@ -74,15 +68,10 @@ void irq_move_irq(struct irq_data *idata)
* threaded interrupt with ONESHOT set, we can end up with an
* interrupt storm.
*/
- masked = desc->istate & IRQS_MASKED;
+ masked = irqd_irq_masked(idata);
if (!masked)
idata->chip->irq_mask(idata);
irq_move_masked_irq(idata);
if (!masked)
idata->chip->irq_unmask(idata);
}
-
-void move_native_irq(int irq)
-{
- irq_move_irq(irq_get_irq_data(irq));
-}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 760248d..dd201bd 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -364,6 +364,10 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec)
return 0;
}
+#ifndef ACTUAL_NR_IRQS
+# define ACTUAL_NR_IRQS nr_irqs
+#endif
+
int show_interrupts(struct seq_file *p, void *v)
{
static int prec;
@@ -373,10 +377,10 @@ int show_interrupts(struct seq_file *p, void *v)
struct irqaction *action;
struct irq_desc *desc;
- if (i > nr_irqs)
+ if (i > ACTUAL_NR_IRQS)
return 0;
- if (i == nr_irqs)
+ if (i == ACTUAL_NR_IRQS)
return arch_show_interrupts(p, prec);
/* print header and calculate the width of the first column */
@@ -404,7 +408,20 @@ int show_interrupts(struct seq_file *p, void *v)
seq_printf(p, "%*d: ", prec, i);
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
- seq_printf(p, " %8s", desc->irq_data.chip->name);
+
+ if (desc->irq_data.chip) {
+ if (desc->irq_data.chip->irq_print_chip)
+ desc->irq_data.chip->irq_print_chip(&desc->irq_data, p);
+ else if (desc->irq_data.chip->name)
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
+ else
+ seq_printf(p, " %8s", "-");
+ } else {
+ seq_printf(p, " %8s", "None");
+ }
+#ifdef CONFIG_GENIRC_IRQ_SHOW_LEVEL
+ seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
+#endif
if (desc->name)
seq_printf(p, "-%-8s", desc->name);
diff --git a/kernel/irq/resend.c b/kernel/irq/resend.c
index ad683a9..14dd576 100644
--- a/kernel/irq/resend.c
+++ b/kernel/irq/resend.c
@@ -65,7 +65,6 @@ void check_irq_resend(struct irq_desc *desc, unsigned int irq)
if (desc->istate & IRQS_REPLAY)
return;
if (desc->istate & IRQS_PENDING) {
- irq_compat_clr_pending(desc);
desc->istate &= ~IRQS_PENDING;
desc->istate |= IRQS_REPLAY;
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index 0227ad3..0d91730 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -15,17 +15,8 @@ enum {
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
-#define IRQ_INPROGRESS GOT_YOU_MORON
-#define IRQ_REPLAY GOT_YOU_MORON
-#define IRQ_WAITING GOT_YOU_MORON
-#define IRQ_DISABLED GOT_YOU_MORON
-#define IRQ_PENDING GOT_YOU_MORON
-#define IRQ_MASKED GOT_YOU_MORON
-#define IRQ_WAKEUP GOT_YOU_MORON
-#define IRQ_MOVE_PENDING GOT_YOU_MORON
#define IRQ_PER_CPU GOT_YOU_MORON
#define IRQ_NO_BALANCING GOT_YOU_MORON
-#define IRQ_AFFINITY_SET GOT_YOU_MORON
#define IRQ_LEVEL GOT_YOU_MORON
#define IRQ_NOPROBE GOT_YOU_MORON
#define IRQ_NOREQUEST GOT_YOU_MORON
@@ -37,102 +28,98 @@ enum {
static inline void
irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
{
- desc->status &= ~(clr & _IRQF_MODIFY_MASK);
- desc->status |= (set & _IRQF_MODIFY_MASK);
+ desc->status_use_accessors &= ~(clr & _IRQF_MODIFY_MASK);
+ desc->status_use_accessors |= (set & _IRQF_MODIFY_MASK);
}
static inline bool irq_settings_is_per_cpu(struct irq_desc *desc)
{
- return desc->status & _IRQ_PER_CPU;
+ return desc->status_use_accessors & _IRQ_PER_CPU;
}
static inline void irq_settings_set_per_cpu(struct irq_desc *desc)
{
- desc->status |= _IRQ_PER_CPU;
+ desc->status_use_accessors |= _IRQ_PER_CPU;
}
static inline void irq_settings_set_no_balancing(struct irq_desc *desc)
{
- desc->status |= _IRQ_NO_BALANCING;
+ desc->status_use_accessors |= _IRQ_NO_BALANCING;
}
static inline bool irq_settings_has_no_balance_set(struct irq_desc *desc)
{
- return desc->status & _IRQ_NO_BALANCING;
+ return desc->status_use_accessors & _IRQ_NO_BALANCING;
}
static inline u32 irq_settings_get_trigger_mask(struct irq_desc *desc)
{
- return desc->status & IRQ_TYPE_SENSE_MASK;
+ return desc->status_use_accessors & IRQ_TYPE_SENSE_MASK;
}
static inline void
irq_settings_set_trigger_mask(struct irq_desc *desc, u32 mask)
{
- desc->status &= ~IRQ_TYPE_SENSE_MASK;
- desc->status |= mask & IRQ_TYPE_SENSE_MASK;
+ desc->status_use_accessors &= ~IRQ_TYPE_SENSE_MASK;
+ desc->status_use_accessors |= mask & IRQ_TYPE_SENSE_MASK;
}
static inline bool irq_settings_is_level(struct irq_desc *desc)
{
- return desc->status & _IRQ_LEVEL;
+ return desc->status_use_accessors & _IRQ_LEVEL;
}
static inline void irq_settings_clr_level(struct irq_desc *desc)
{
- desc->status &= ~_IRQ_LEVEL;
+ desc->status_use_accessors &= ~_IRQ_LEVEL;
}
static inline void irq_settings_set_level(struct irq_desc *desc)
{
- desc->status |= _IRQ_LEVEL;
+ desc->status_use_accessors |= _IRQ_LEVEL;
}
static inline bool irq_settings_can_request(struct irq_desc *desc)
{
- return !(desc->status & _IRQ_NOREQUEST);
+ return !(desc->status_use_accessors & _IRQ_NOREQUEST);
}
static inline void irq_settings_clr_norequest(struct irq_desc *desc)
{
- desc->status &= ~_IRQ_NOREQUEST;
+ desc->status_use_accessors &= ~_IRQ_NOREQUEST;
}
static inline void irq_settings_set_norequest(struct irq_desc *desc)
{
- desc->status |= _IRQ_NOREQUEST;
+ desc->status_use_accessors |= _IRQ_NOREQUEST;
}
static inline bool irq_settings_can_probe(struct irq_desc *desc)
{
- return !(desc->status & _IRQ_NOPROBE);
+ return !(desc->status_use_accessors & _IRQ_NOPROBE);
}
static inline void irq_settings_clr_noprobe(struct irq_desc *desc)
{
- desc->status &= ~_IRQ_NOPROBE;
+ desc->status_use_accessors &= ~_IRQ_NOPROBE;
}
static inline void irq_settings_set_noprobe(struct irq_desc *desc)
{
- desc->status |= _IRQ_NOPROBE;
+ desc->status_use_accessors |= _IRQ_NOPROBE;
}
static inline bool irq_settings_can_move_pcntxt(struct irq_desc *desc)
{
- return desc->status & _IRQ_MOVE_PCNTXT;
+ return desc->status_use_accessors & _IRQ_MOVE_PCNTXT;
}
static inline bool irq_settings_can_autoenable(struct irq_desc *desc)
{
- return !(desc->status & _IRQ_NOAUTOEN);
+ return !(desc->status_use_accessors & _IRQ_NOAUTOEN);
}
static inline bool irq_settings_is_nested_thread(struct irq_desc *desc)
{
- return desc->status & _IRQ_NESTED_THREAD;
+ return desc->status_use_accessors & _IRQ_NESTED_THREAD;
}
-
-/* Nothing should touch desc->status from now on */
-#undef status
-#define status USE_THE_PROPER_WRAPPERS_YOU_MORON
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index dd586eb..dfbd550 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -45,12 +45,12 @@ bool irq_wait_for_poll(struct irq_desc *desc)
#ifdef CONFIG_SMP
do {
raw_spin_unlock(&desc->lock);
- while (desc->istate & IRQS_INPROGRESS)
+ while (irqd_irq_inprogress(&desc->irq_data))
cpu_relax();
raw_spin_lock(&desc->lock);
- } while (desc->istate & IRQS_INPROGRESS);
+ } while (irqd_irq_inprogress(&desc->irq_data));
/* Might have been disabled in meantime */
- return !(desc->istate & IRQS_DISABLED) && desc->action;
+ return !irqd_irq_disabled(&desc->irq_data) && desc->action;
#else
return false;
#endif
@@ -75,7 +75,7 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
* Do not poll disabled interrupts unless the spurious
* disabled poller asks explicitely.
*/
- if ((desc->istate & IRQS_DISABLED) && !force)
+ if (irqd_irq_disabled(&desc->irq_data) && !force)
goto out;
/*
@@ -88,12 +88,11 @@ static int try_one_irq(int irq, struct irq_desc *desc, bool force)
goto out;
/* Already running on another processor */
- if (desc->istate & IRQS_INPROGRESS) {
+ if (irqd_irq_inprogress(&desc->irq_data)) {
/*
* Already running: If it is shared get the other
* CPU to go looking for our mystery interrupt too
*/
- irq_compat_set_pending(desc);
desc->istate |= IRQS_PENDING;
goto out;
}
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 75dcca3..079f1d3 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -64,14 +64,14 @@ static inline int is_kernel_text(unsigned long addr)
if ((addr >= (unsigned long)_stext && addr <= (unsigned long)_etext) ||
arch_is_kernel_text(addr))
return 1;
- return in_gate_area_no_task(addr);
+ return in_gate_area_no_mm(addr);
}
static inline int is_kernel(unsigned long addr)
{
if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end)
return 1;
- return in_gate_area_no_task(addr);
+ return in_gate_area_no_mm(addr);
}
static int is_ksym_addr(unsigned long addr)
@@ -342,13 +342,15 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
}
/* Look up a kernel symbol and return it in a text buffer. */
-int sprint_symbol(char *buffer, unsigned long address)
+static int __sprint_symbol(char *buffer, unsigned long address,
+ int symbol_offset)
{
char *modname;
const char *name;
unsigned long offset, size;
int len;
+ address += symbol_offset;
name = kallsyms_lookup(address, &size, &offset, &modname, buffer);
if (!name)
return sprintf(buffer, "0x%lx", address);
@@ -357,17 +359,53 @@ int sprint_symbol(char *buffer, unsigned long address)
strcpy(buffer, name);
len = strlen(buffer);
buffer += len;
+ offset -= symbol_offset;
if (modname)
- len += sprintf(buffer, "+%#lx/%#lx [%s]",
- offset, size, modname);
+ len += sprintf(buffer, "+%#lx/%#lx [%s]", offset, size, modname);
else
len += sprintf(buffer, "+%#lx/%#lx", offset, size);
return len;
}
+
+/**
+ * sprint_symbol - Look up a kernel symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function looks up a kernel symbol with @address and stores its name,
+ * offset, size and module name to @buffer if possible. If no symbol was found,
+ * just saves its @address as is.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_symbol(char *buffer, unsigned long address)
+{
+ return __sprint_symbol(buffer, address, 0);
+}
+
EXPORT_SYMBOL_GPL(sprint_symbol);
+/**
+ * sprint_backtrace - Look up a backtrace symbol and return it in a text buffer
+ * @buffer: buffer to be stored
+ * @address: address to lookup
+ *
+ * This function is for stack backtrace and does the same thing as
+ * sprint_symbol() but with modified/decreased @address. If there is a
+ * tail-call to the function marked "noreturn", gcc optimized out code after
+ * the call so that the stack-saved return address could point outside of the
+ * caller. This function ensures that kallsyms will find the original caller
+ * by decreasing @address.
+ *
+ * This function returns the number of bytes stored in @buffer.
+ */
+int sprint_backtrace(char *buffer, unsigned long address)
+{
+ return __sprint_symbol(buffer, address, -1);
+}
+
/* Look up a kernel symbol and print it to the kernel messages. */
void __print_symbol(const char *fmt, unsigned long address)
{
diff --git a/kernel/kexec.c b/kernel/kexec.c
index ec19b92..4e240a3 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1099,7 +1099,8 @@ size_t crash_get_memory_size(void)
return size;
}
-static void free_reserved_phys_range(unsigned long begin, unsigned long end)
+void __weak crash_free_reserved_phys_range(unsigned long begin,
+ unsigned long end)
{
unsigned long addr;
@@ -1135,7 +1136,7 @@ int crash_shrink_memory(unsigned long new_size)
start = roundup(start, PAGE_SIZE);
end = roundup(start + new_size, PAGE_SIZE);
- free_reserved_phys_range(end, crashk_res.end);
+ crash_free_reserved_phys_range(end, crashk_res.end);
if ((start == end) && (crashk_res.parent != NULL))
release_resource(&crashk_res);
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index 1969d2f..71edd2f 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -225,7 +225,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
nr_irq_read_safe = 0, nr_irq_read_unsafe = 0,
nr_softirq_read_safe = 0, nr_softirq_read_unsafe = 0,
nr_hardirq_read_safe = 0, nr_hardirq_read_unsafe = 0,
- sum_forward_deps = 0, factor = 0;
+ sum_forward_deps = 0;
list_for_each_entry(class, &all_lock_classes, lock_entry) {
@@ -283,13 +283,6 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
nr_hardirq_unsafe * nr_hardirq_safe +
nr_list_entries);
- /*
- * Estimated factor between direct and indirect
- * dependencies:
- */
- if (nr_list_entries)
- factor = sum_forward_deps / nr_list_entries;
-
#ifdef CONFIG_PROVE_LOCKING
seq_printf(m, " dependency chains: %11lu [max: %lu]\n",
nr_lock_chains, MAX_LOCKDEP_CHAINS);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f74e6c0..a05d191 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -69,13 +69,13 @@ static struct nsproxy *create_new_namespaces(unsigned long flags,
goto out_ns;
}
- new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns);
+ new_nsp->uts_ns = copy_utsname(flags, tsk);
if (IS_ERR(new_nsp->uts_ns)) {
err = PTR_ERR(new_nsp->uts_ns);
goto out_uts;
}
- new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns);
+ new_nsp->ipc_ns = copy_ipcs(flags, tsk);
if (IS_ERR(new_nsp->ipc_ns)) {
err = PTR_ERR(new_nsp->ipc_ns);
goto out_ipc;
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3472bb1..27960f1 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -145,7 +145,8 @@ static struct srcu_struct pmus_srcu;
*/
int sysctl_perf_event_paranoid __read_mostly = 1;
-int sysctl_perf_event_mlock __read_mostly = 512; /* 'free' kb per user */
+/* Minimum for 512 kiB + 1 user control page */
+int sysctl_perf_event_mlock __read_mostly = 512 + (PAGE_SIZE / 1024); /* 'free' kiB per user */
/*
* max perf event sample rate
@@ -941,6 +942,7 @@ static void perf_group_attach(struct perf_event *event)
static void
list_del_event(struct perf_event *event, struct perf_event_context *ctx)
{
+ struct perf_cpu_context *cpuctx;
/*
* We can have double detach due to exit/hot-unplug + close.
*/
@@ -949,8 +951,17 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
event->attach_state &= ~PERF_ATTACH_CONTEXT;
- if (is_cgroup_event(event))
+ if (is_cgroup_event(event)) {
ctx->nr_cgroups--;
+ cpuctx = __get_cpu_context(ctx);
+ /*
+ * if there are no more cgroup events
+ * then cler cgrp to avoid stale pointer
+ * in update_cgrp_time_from_cpuctx()
+ */
+ if (!ctx->nr_cgroups)
+ cpuctx->cgrp = NULL;
+ }
ctx->nr_events--;
if (event->attr.inherit_stat)
@@ -6520,6 +6531,11 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_alloc;
}
+ if (task) {
+ put_task_struct(task);
+ task = NULL;
+ }
+
/*
* Look up the group leader (we will attach this event to it):
*/
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a5aff94..e9c9adc 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -14,6 +14,7 @@
#include <linux/err.h>
#include <linux/acct.h>
#include <linux/slab.h>
+#include <linux/proc_fs.h>
#define BITS_PER_PAGE (PAGE_SIZE*8)
@@ -72,7 +73,7 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
{
struct pid_namespace *ns;
unsigned int level = parent_pid_ns->level + 1;
- int i;
+ int i, err = -ENOMEM;
ns = kmem_cache_zalloc(pid_ns_cachep, GFP_KERNEL);
if (ns == NULL)
@@ -96,14 +97,20 @@ static struct pid_namespace *create_pid_namespace(struct pid_namespace *parent_p
for (i = 1; i < PIDMAP_ENTRIES; i++)
atomic_set(&ns->pidmap[i].nr_free, BITS_PER_PAGE);
+ err = pid_ns_prepare_proc(ns);
+ if (err)
+ goto out_put_parent_pid_ns;
+
return ns;
+out_put_parent_pid_ns:
+ put_pid_ns(parent_pid_ns);
out_free_map:
kfree(ns->pidmap[0].page);
out_free:
kmem_cache_free(pid_ns_cachep, ns);
out:
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR(err);
}
static void destroy_pid_namespace(struct pid_namespace *ns)
diff --git a/kernel/power/block_io.c b/kernel/power/block_io.c
index 83bbc7c..d09dd10 100644
--- a/kernel/power/block_io.c
+++ b/kernel/power/block_io.c
@@ -28,7 +28,7 @@
static int submit(int rw, struct block_device *bdev, sector_t sector,
struct page *page, struct bio **bio_chain)
{
- const int bio_rw = rw | REQ_SYNC | REQ_UNPLUG;
+ const int bio_rw = rw | REQ_SYNC;
struct bio *bio;
bio = bio_alloc(__GFP_WAIT | __GFP_HIGH, 1);
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 20d5efd..512bd01 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -149,21 +149,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode)
return 0;
rcu_read_lock();
tcred = __task_cred(task);
- if ((cred->uid != tcred->euid ||
- cred->uid != tcred->suid ||
- cred->uid != tcred->uid ||
- cred->gid != tcred->egid ||
- cred->gid != tcred->sgid ||
- cred->gid != tcred->gid) &&
- !capable(CAP_SYS_PTRACE)) {
- rcu_read_unlock();
- return -EPERM;
- }
+ if (cred->user->user_ns == tcred->user->user_ns &&
+ (cred->uid == tcred->euid &&
+ cred->uid == tcred->suid &&
+ cred->uid == tcred->uid &&
+ cred->gid == tcred->egid &&
+ cred->gid == tcred->sgid &&
+ cred->gid == tcred->gid))
+ goto ok;
+ if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE))
+ goto ok;
+ rcu_read_unlock();
+ return -EPERM;
+ok:
rcu_read_unlock();
smp_rmb();
if (task->mm)
dumpable = get_dumpable(task->mm);
- if (!dumpable && !capable(CAP_SYS_PTRACE))
+ if (!dumpable && !task_ns_capable(task, CAP_SYS_PTRACE))
return -EPERM;
return security_ptrace_access_check(task, mode);
@@ -214,7 +217,7 @@ static int ptrace_attach(struct task_struct *task)
goto unlock_tasklist;
task->ptrace = PT_PTRACED;
- if (capable(CAP_SYS_PTRACE))
+ if (task_ns_capable(task, CAP_SYS_PTRACE))
task->ptrace |= PT_PTRACE_CAP;
__ptrace_link(task, current);
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
index c7eaa37..34683ef 100644
--- a/kernel/res_counter.c
+++ b/kernel/res_counter.c
@@ -126,10 +126,24 @@ ssize_t res_counter_read(struct res_counter *counter, int member,
pos, buf, s - buf);
}
+#if BITS_PER_LONG == 32
+u64 res_counter_read_u64(struct res_counter *counter, int member)
+{
+ unsigned long flags;
+ u64 ret;
+
+ spin_lock_irqsave(&counter->lock, flags);
+ ret = *res_counter_member(counter, member);
+ spin_unlock_irqrestore(&counter->lock, flags);
+
+ return ret;
+}
+#else
u64 res_counter_read_u64(struct res_counter *counter, int member)
{
return *res_counter_member(counter, member);
}
+#endif
int res_counter_memparse_write_strategy(const char *buf,
unsigned long long *res)
diff --git a/kernel/sched.c b/kernel/sched.c
index a172494..a884551 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4115,6 +4115,16 @@ need_resched:
switch_count = &prev->nvcsw;
}
+ /*
+ * If we are going to sleep and we have plugged IO queued, make
+ * sure to submit it to avoid deadlocks.
+ */
+ if (prev->state != TASK_RUNNING && blk_needs_flush_plug(prev)) {
+ raw_spin_unlock(&rq->lock);
+ blk_flush_plug(prev);
+ raw_spin_lock(&rq->lock);
+ }
+
pre_schedule(rq, prev);
if (unlikely(!rq->nr_running))
@@ -4892,8 +4902,11 @@ static bool check_same_owner(struct task_struct *p)
rcu_read_lock();
pcred = __task_cred(p);
- match = (cred->euid == pcred->euid ||
- cred->euid == pcred->uid);
+ if (cred->user->user_ns == pcred->user->user_ns)
+ match = (cred->euid == pcred->euid ||
+ cred->euid == pcred->uid);
+ else
+ match = false;
rcu_read_unlock();
return match;
}
@@ -4998,6 +5011,17 @@ recheck:
return -EINVAL;
}
+ /*
+ * If not changing anything there's no need to proceed further:
+ */
+ if (unlikely(policy == p->policy && (!rt_policy(policy) ||
+ param->sched_priority == p->rt_priority))) {
+
+ __task_rq_unlock(rq);
+ raw_spin_unlock_irqrestore(&p->pi_lock, flags);
+ return 0;
+ }
+
#ifdef CONFIG_RT_GROUP_SCHED
if (user) {
/*
@@ -5221,7 +5245,7 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
goto out_free_cpus_allowed;
}
retval = -EPERM;
- if (!check_same_owner(p) && !capable(CAP_SYS_NICE))
+ if (!check_same_owner(p) && !task_ns_capable(p, CAP_SYS_NICE))
goto out_unlock;
retval = security_task_setscheduler(p);
@@ -5460,6 +5484,8 @@ EXPORT_SYMBOL(yield);
* yield_to - yield the current processor to another thread in
* your thread group, or accelerate that thread toward the
* processor it's on.
+ * @p: target task
+ * @preempt: whether task preemption is allowed or not
*
* It's the caller's job to ensure that the target task struct
* can't go away on us before we can do any checks.
@@ -5525,6 +5551,7 @@ void __sched io_schedule(void)
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
+ blk_flush_plug(current);
current->in_iowait = 1;
schedule();
current->in_iowait = 0;
@@ -5540,6 +5567,7 @@ long __sched io_schedule_timeout(long timeout)
delayacct_blkio_start();
atomic_inc(&rq->nr_iowait);
+ blk_flush_plug(current);
current->in_iowait = 1;
ret = schedule_timeout(timeout);
current->in_iowait = 0;
@@ -8434,7 +8462,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
{
struct cfs_rq *cfs_rq;
struct sched_entity *se;
- struct rq *rq;
int i;
tg->cfs_rq = kzalloc(sizeof(cfs_rq) * nr_cpu_ids, GFP_KERNEL);
@@ -8447,8 +8474,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
tg->shares = NICE_0_LOAD;
for_each_possible_cpu(i) {
- rq = cpu_rq(i);
-
cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
GFP_KERNEL, cpu_to_node(i));
if (!cfs_rq)
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3f7ec9e..c7ec5c8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -22,6 +22,7 @@
#include <linux/latencytop.h>
#include <linux/sched.h>
+#include <linux/cpumask.h>
/*
* Targeted preemption latency for CPU-bound tasks:
@@ -3850,8 +3851,8 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
interval = msecs_to_jiffies(interval);
if (unlikely(!interval))
interval = 1;
- if (interval > HZ*NR_CPUS/10)
- interval = HZ*NR_CPUS/10;
+ if (interval > HZ*num_online_cpus()/10)
+ interval = HZ*num_online_cpus()/10;
need_serialize = sd->flags & SD_SERIALIZE;
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index c82f26c..a776a63 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -94,6 +94,4 @@ static const struct sched_class idle_sched_class = {
.prio_changed = prio_changed_idle,
.switched_to = switched_to_idle,
-
- /* no .task_new for idle tasks */
};
diff --git a/kernel/sched_stoptask.c b/kernel/sched_stoptask.c
index 84ec9bc..1ba2bd4 100644
--- a/kernel/sched_stoptask.c
+++ b/kernel/sched_stoptask.c
@@ -102,6 +102,4 @@ static const struct sched_class stop_sched_class = {
.prio_changed = prio_changed_stop,
.switched_to = switched_to_stop,
-
- /* no .task_new for stop tasks */
};
diff --git a/kernel/signal.c b/kernel/signal.c
index 4f7312b..c15e979 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -303,7 +303,7 @@ static bool task_participate_group_stop(struct task_struct *task)
/*
* allocate a new signal queue record
* - this may be called without locks if and only if t == current, otherwise an
- * appopriate lock must be held to stop the target task from exiting
+ * appropriate lock must be held to stop the target task from exiting
*/
static struct sigqueue *
__sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit)
@@ -452,15 +452,15 @@ int unhandled_signal(struct task_struct *tsk, int sig)
return !tracehook_consider_fatal_signal(tsk, sig);
}
-
-/* Notify the system that a driver wants to block all signals for this
+/*
+ * Notify the system that a driver wants to block all signals for this
* process, and wants to be notified if any signals at all were to be
* sent/acted upon. If the notifier routine returns non-zero, then the
* signal will be acted upon after all. If the notifier routine returns 0,
* then then signal will be blocked. Only one block per process is
* allowed. priv is a pointer to private data that the notifier routine
- * can use to determine if the signal should be blocked or not. */
-
+ * can use to determine if the signal should be blocked or not.
+ */
void
block_all_signals(int (*notifier)(void *priv), void *priv, sigset_t *mask)
{
@@ -511,9 +511,10 @@ still_pending:
copy_siginfo(info, &first->info);
__sigqueue_free(first);
} else {
- /* Ok, it wasn't in the queue. This must be
- a fast-pathed signal or we must have been
- out of queue space. So zero out the info.
+ /*
+ * Ok, it wasn't in the queue. This must be
+ * a fast-pathed signal or we must have been
+ * out of queue space. So zero out the info.
*/
info->si_signo = sig;
info->si_errno = 0;
@@ -545,7 +546,7 @@ static int __dequeue_signal(struct sigpending *pending, sigset_t *mask,
}
/*
- * Dequeue a signal and return the element to the caller, which is
+ * Dequeue a signal and return the element to the caller, which is
* expected to free it.
*
* All callers have to hold the siglock.
@@ -567,7 +568,7 @@ int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info)
* itimers are process shared and we restart periodic
* itimers in the signal delivery path to prevent DoS
* attacks in the high resolution timer case. This is
- * compliant with the old way of self restarting
+ * compliant with the old way of self-restarting
* itimers, as the SIGALRM is a legacy signal and only
* queued once. Changing the restart behaviour to
* restart the timer in the signal dequeue path is
@@ -713,13 +714,33 @@ static inline bool si_fromuser(const struct siginfo *info)
}
/*
+ * called with RCU read lock from check_kill_permission()
+ */
+static int kill_ok_by_cred(struct task_struct *t)
+{
+ const struct cred *cred = current_cred();
+ const struct cred *tcred = __task_cred(t);
+
+ if (cred->user->user_ns == tcred->user->user_ns &&
+ (cred->euid == tcred->suid ||
+ cred->euid == tcred->uid ||
+ cred->uid == tcred->suid ||
+ cred->uid == tcred->uid))
+ return 1;
+
+ if (ns_capable(tcred->user->user_ns, CAP_KILL))
+ return 1;
+
+ return 0;
+}
+
+/*
* Bad permissions for sending the signal
* - the caller must hold the RCU read lock
*/
static int check_kill_permission(int sig, struct siginfo *info,
struct task_struct *t)
{
- const struct cred *cred, *tcred;
struct pid *sid;
int error;
@@ -733,14 +754,8 @@ static int check_kill_permission(int sig, struct siginfo *info,
if (error)
return error;
- cred = current_cred();
- tcred = __task_cred(t);
if (!same_thread_group(current, t) &&
- (cred->euid ^ tcred->suid) &&
- (cred->euid ^ tcred->uid) &&
- (cred->uid ^ tcred->suid) &&
- (cred->uid ^ tcred->uid) &&
- !capable(CAP_KILL)) {
+ !kill_ok_by_cred(t)) {
switch (sig) {
case SIGCONT:
sid = task_session(t);
@@ -960,14 +975,15 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
if (info == SEND_SIG_FORCED)
goto out_set;
- /* Real-time signals must be queued if sent by sigqueue, or
- some other real-time mechanism. It is implementation
- defined whether kill() does so. We attempt to do so, on
- the principle of least surprise, but since kill is not
- allowed to fail with EAGAIN when low on memory we just
- make sure at least one signal gets delivered and don't
- pass on the info struct. */
-
+ /*
+ * Real-time signals must be queued if sent by sigqueue, or
+ * some other real-time mechanism. It is implementation
+ * defined whether kill() does so. We attempt to do so, on
+ * the principle of least surprise, but since kill is not
+ * allowed to fail with EAGAIN when low on memory we just
+ * make sure at least one signal gets delivered and don't
+ * pass on the info struct.
+ */
if (sig < SIGRTMIN)
override_rlimit = (is_si_special(info) || info->si_code >= 0);
else
@@ -1239,8 +1255,7 @@ retry:
return error;
}
-int
-kill_proc_info(int sig, struct siginfo *info, pid_t pid)
+int kill_proc_info(int sig, struct siginfo *info, pid_t pid)
{
int error;
rcu_read_lock();
@@ -1337,8 +1352,7 @@ static int kill_something_info(int sig, struct siginfo *info, pid_t pid)
* These are for backward compatibility with the rest of the kernel source.
*/
-int
-send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
+int send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
/*
* Make sure legacy kernel users don't send in bad values
@@ -1406,7 +1420,7 @@ EXPORT_SYMBOL(kill_pid);
* These functions support sending signals using preallocated sigqueue
* structures. This is needed "because realtime applications cannot
* afford to lose notifications of asynchronous events, like timer
- * expirations or I/O completions". In the case of Posix Timers
+ * expirations or I/O completions". In the case of POSIX Timers
* we allocate the sigqueue structure from the timer_create. If this
* allocation fails we are able to report the failure to the application
* with an EAGAIN error.
@@ -1605,7 +1619,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk,
info.si_signo = SIGCHLD;
info.si_errno = 0;
/*
- * see comment in do_notify_parent() abot the following 3 lines
+ * see comment in do_notify_parent() about the following 4 lines
*/
rcu_read_lock();
info.si_pid = task_pid_nr_ns(tsk, parent->nsproxy->pid_ns);
@@ -1663,7 +1677,7 @@ static inline int may_ptrace_stop(void)
}
/*
- * Return nonzero if there is a SIGKILL that should be waking us up.
+ * Return non-zero if there is a SIGKILL that should be waking us up.
* Called with the siglock held.
*/
static int sigkill_pending(struct task_struct *tsk)
@@ -1835,7 +1849,7 @@ void ptrace_notify(int exit_code)
/*
* This performs the stopping for SIGSTOP and other stop signals.
* We have to stop all threads in the thread group.
- * Returns nonzero if we've actually stopped and released the siglock.
+ * Returns non-zero if we've actually stopped and released the siglock.
* Returns zero if we didn't stop and still hold the siglock.
*/
static int do_signal_stop(int signr)
@@ -1972,10 +1986,12 @@ static int ptrace_signal(int signr, siginfo_t *info,
current->exit_code = 0;
- /* Update the siginfo structure if the signal has
- changed. If the debugger wanted something
- specific in the siginfo structure then it should
- have updated *info via PTRACE_SETSIGINFO. */
+ /*
+ * Update the siginfo structure if the signal has
+ * changed. If the debugger wanted something
+ * specific in the siginfo structure then it should
+ * have updated *info via PTRACE_SETSIGINFO.
+ */
if (signr != info->si_signo) {
info->si_signo = signr;
info->si_errno = 0;
@@ -2201,7 +2217,8 @@ void exit_signals(struct task_struct *tsk)
if (!signal_pending(tsk))
goto out;
- /* It could be that __group_complete_signal() choose us to
+ /*
+ * It could be that __group_complete_signal() choose us to
* notify about group-wide signal. Another thread should be
* woken now to take the signal since we will not.
*/
@@ -2241,6 +2258,9 @@ EXPORT_SYMBOL(unblock_all_signals);
* System call entry points.
*/
+/**
+ * sys_restart_syscall - restart a system call
+ */
SYSCALL_DEFINE0(restart_syscall)
{
struct restart_block *restart = &current_thread_info()->restart_block;
@@ -2294,6 +2314,13 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
return error;
}
+/**
+ * sys_rt_sigprocmask - change the list of currently blocked signals
+ * @how: whether to add, remove, or set signals
+ * @set: stores pending signals
+ * @oset: previous value of signal mask if non-null
+ * @sigsetsize: size of sigset_t type
+ */
SYSCALL_DEFINE4(rt_sigprocmask, int, how, sigset_t __user *, set,
sigset_t __user *, oset, size_t, sigsetsize)
{
@@ -2352,8 +2379,14 @@ long do_sigpending(void __user *set, unsigned long sigsetsize)
out:
return error;
-}
+}
+/**
+ * sys_rt_sigpending - examine a pending signal that has been raised
+ * while blocked
+ * @set: stores pending signals
+ * @sigsetsize: size of sigset_t type or larger
+ */
SYSCALL_DEFINE2(rt_sigpending, sigset_t __user *, set, size_t, sigsetsize)
{
return do_sigpending(set, sigsetsize);
@@ -2402,9 +2435,9 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
err |= __put_user(from->si_trapno, &to->si_trapno);
#endif
#ifdef BUS_MCEERR_AO
- /*
+ /*
* Other callers might not initialize the si_lsb field,
- * so check explicitely for the right codes here.
+ * so check explicitly for the right codes here.
*/
if (from->si_code == BUS_MCEERR_AR || from->si_code == BUS_MCEERR_AO)
err |= __put_user(from->si_addr_lsb, &to->si_addr_lsb);
@@ -2433,6 +2466,14 @@ int copy_siginfo_to_user(siginfo_t __user *to, siginfo_t *from)
#endif
+/**
+ * sys_rt_sigtimedwait - synchronously wait for queued signals specified
+ * in @uthese
+ * @uthese: queued signals to wait for
+ * @uinfo: if non-null, the signal's siginfo is returned here
+ * @uts: upper bound on process time suspension
+ * @sigsetsize: size of sigset_t type
+ */
SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
siginfo_t __user *, uinfo, const struct timespec __user *, uts,
size_t, sigsetsize)
@@ -2449,7 +2490,7 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
if (copy_from_user(&these, uthese, sizeof(these)))
return -EFAULT;
-
+
/*
* Invert the set of allowed signals to get those we
* want to block.
@@ -2474,9 +2515,11 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
+ (ts.tv_sec || ts.tv_nsec));
if (timeout) {
- /* None ready -- temporarily unblock those we're
+ /*
+ * None ready -- temporarily unblock those we're
* interested while we are sleeping in so that we'll
- * be awakened when they arrive. */
+ * be awakened when they arrive.
+ */
current->real_blocked = current->blocked;
sigandsets(&current->blocked, &current->blocked, &these);
recalc_sigpending();
@@ -2508,6 +2551,11 @@ SYSCALL_DEFINE4(rt_sigtimedwait, const sigset_t __user *, uthese,
return ret;
}
+/**
+ * sys_kill - send a signal to a process
+ * @pid: the PID of the process
+ * @sig: signal to be sent
+ */
SYSCALL_DEFINE2(kill, pid_t, pid, int, sig)
{
struct siginfo info;
@@ -2583,7 +2631,11 @@ SYSCALL_DEFINE3(tgkill, pid_t, tgid, pid_t, pid, int, sig)
return do_tkill(tgid, pid, sig);
}
-/*
+/**
+ * sys_tkill - send signal to one specific task
+ * @pid: the PID of the task
+ * @sig: signal to be sent
+ *
* Send a signal to only one task, even if it's a CLONE_THREAD task.
*/
SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
@@ -2595,6 +2647,12 @@ SYSCALL_DEFINE2(tkill, pid_t, pid, int, sig)
return do_tkill(0, pid, sig);
}
+/**
+ * sys_rt_sigqueueinfo - send signal information to a signal
+ * @pid: the PID of the thread
+ * @sig: signal to be sent
+ * @uinfo: signal info to be sent
+ */
SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
siginfo_t __user *, uinfo)
{
@@ -2606,7 +2664,7 @@ SYSCALL_DEFINE3(rt_sigqueueinfo, pid_t, pid, int, sig,
/* Not even root can pretend to send signals from the kernel.
* Nor can they impersonate a kill()/tgkill(), which adds source info.
*/
- if (info.si_code != SI_QUEUE) {
+ if (info.si_code >= 0 || info.si_code == SI_TKILL) {
/* We used to allow any < 0 si_code */
WARN_ON_ONCE(info.si_code < 0);
return -EPERM;
@@ -2626,7 +2684,7 @@ long do_rt_tgsigqueueinfo(pid_t tgid, pid_t pid, int sig, siginfo_t *info)
/* Not even root can pretend to send signals from the kernel.
* Nor can they impersonate a kill()/tgkill(), which adds source info.
*/
- if (info->si_code != SI_QUEUE) {
+ if (info->si_code >= 0 || info->si_code == SI_TKILL) {
/* We used to allow any < 0 si_code */
WARN_ON_ONCE(info->si_code < 0);
return -EPERM;
@@ -2722,12 +2780,11 @@ do_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, unsigned long s
error = -EINVAL;
/*
- *
- * Note - this code used to test ss_flags incorrectly
+ * Note - this code used to test ss_flags incorrectly:
* old code may have been written using ss_flags==0
* to mean ss_flags==SS_ONSTACK (as this was the only
* way that worked) - this fix preserves that older
- * mechanism
+ * mechanism.
*/
if (ss_flags != SS_DISABLE && ss_flags != SS_ONSTACK && ss_flags != 0)
goto out;
@@ -2761,6 +2818,10 @@ out:
#ifdef __ARCH_WANT_SYS_SIGPENDING
+/**
+ * sys_sigpending - examine pending signals
+ * @set: where mask of pending signal is returned
+ */
SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
{
return do_sigpending(set, sizeof(*set));
@@ -2769,8 +2830,15 @@ SYSCALL_DEFINE1(sigpending, old_sigset_t __user *, set)
#endif
#ifdef __ARCH_WANT_SYS_SIGPROCMASK
-/* Some platforms have their own version with special arguments others
- support only sys_rt_sigprocmask. */
+/**
+ * sys_sigprocmask - examine and change blocked signals
+ * @how: whether to add, remove, or set signals
+ * @set: signals to add or remove (if non-null)
+ * @oset: previous value of signal mask if non-null
+ *
+ * Some platforms have their own version with special arguments;
+ * others support only sys_rt_sigprocmask.
+ */
SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, set,
old_sigset_t __user *, oset)
@@ -2823,6 +2891,13 @@ out:
#endif /* __ARCH_WANT_SYS_SIGPROCMASK */
#ifdef __ARCH_WANT_SYS_RT_SIGACTION
+/**
+ * sys_rt_sigaction - alter an action taken by a process
+ * @sig: signal to be sent
+ * @act: the thread group ID of the thread
+ * @oact: the PID of the thread
+ * @sigsetsize: size of sigset_t type
+ */
SYSCALL_DEFINE4(rt_sigaction, int, sig,
const struct sigaction __user *, act,
struct sigaction __user *, oact,
@@ -2909,6 +2984,12 @@ SYSCALL_DEFINE0(pause)
#endif
#ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
+/**
+ * sys_rt_sigsuspend - replace the signal mask for a value with the
+ * @unewset value until a signal is received
+ * @unewset: new signal mask value
+ * @sigsetsize: size of sigset_t type
+ */
SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize)
{
sigset_t newset;
diff --git a/kernel/sys.c b/kernel/sys.c
index 1ad48b3..af468ed 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -120,16 +120,33 @@ EXPORT_SYMBOL(cad_pid);
void (*pm_power_off_prepare)(void);
/*
+ * Returns true if current's euid is same as p's uid or euid,
+ * or has CAP_SYS_NICE to p's user_ns.
+ *
+ * Called with rcu_read_lock, creds are safe
+ */
+static bool set_one_prio_perm(struct task_struct *p)
+{
+ const struct cred *cred = current_cred(), *pcred = __task_cred(p);
+
+ if (pcred->user->user_ns == cred->user->user_ns &&
+ (pcred->uid == cred->euid ||
+ pcred->euid == cred->euid))
+ return true;
+ if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
+ return true;
+ return false;
+}
+
+/*
* set the priority of a task
* - the caller must hold the RCU read lock
*/
static int set_one_prio(struct task_struct *p, int niceval, int error)
{
- const struct cred *cred = current_cred(), *pcred = __task_cred(p);
int no_nice;
- if (pcred->uid != cred->euid &&
- pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) {
+ if (!set_one_prio_perm(p)) {
error = -EPERM;
goto out;
}
@@ -506,7 +523,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
if (rgid != (gid_t) -1) {
if (old->gid == rgid ||
old->egid == rgid ||
- capable(CAP_SETGID))
+ nsown_capable(CAP_SETGID))
new->gid = rgid;
else
goto error;
@@ -515,7 +532,7 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
if (old->gid == egid ||
old->egid == egid ||
old->sgid == egid ||
- capable(CAP_SETGID))
+ nsown_capable(CAP_SETGID))
new->egid = egid;
else
goto error;
@@ -550,7 +567,7 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
old = current_cred();
retval = -EPERM;
- if (capable(CAP_SETGID))
+ if (nsown_capable(CAP_SETGID))
new->gid = new->egid = new->sgid = new->fsgid = gid;
else if (gid == old->gid || gid == old->sgid)
new->egid = new->fsgid = gid;
@@ -617,7 +634,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
new->uid = ruid;
if (old->uid != ruid &&
old->euid != ruid &&
- !capable(CAP_SETUID))
+ !nsown_capable(CAP_SETUID))
goto error;
}
@@ -626,7 +643,7 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
if (old->uid != euid &&
old->euid != euid &&
old->suid != euid &&
- !capable(CAP_SETUID))
+ !nsown_capable(CAP_SETUID))
goto error;
}
@@ -674,7 +691,7 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
old = current_cred();
retval = -EPERM;
- if (capable(CAP_SETUID)) {
+ if (nsown_capable(CAP_SETUID)) {
new->suid = new->uid = uid;
if (uid != old->uid) {
retval = set_user(new);
@@ -716,7 +733,7 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
old = current_cred();
retval = -EPERM;
- if (!capable(CAP_SETUID)) {
+ if (!nsown_capable(CAP_SETUID)) {
if (ruid != (uid_t) -1 && ruid != old->uid &&
ruid != old->euid && ruid != old->suid)
goto error;
@@ -780,7 +797,7 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
old = current_cred();
retval = -EPERM;
- if (!capable(CAP_SETGID)) {
+ if (!nsown_capable(CAP_SETGID)) {
if (rgid != (gid_t) -1 && rgid != old->gid &&
rgid != old->egid && rgid != old->sgid)
goto error;
@@ -840,7 +857,7 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
if (uid == old->uid || uid == old->euid ||
uid == old->suid || uid == old->fsuid ||
- capable(CAP_SETUID)) {
+ nsown_capable(CAP_SETUID)) {
if (uid != old_fsuid) {
new->fsuid = uid;
if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
@@ -873,7 +890,7 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
if (gid == old->gid || gid == old->egid ||
gid == old->sgid || gid == old->fsgid ||
- capable(CAP_SETGID)) {
+ nsown_capable(CAP_SETGID)) {
if (gid != old_fsgid) {
new->fsgid = gid;
goto change_okay;
@@ -1181,8 +1198,9 @@ SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
int errno;
char tmp[__NEW_UTS_LEN];
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
+
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
down_write(&uts_sem);
@@ -1230,7 +1248,7 @@ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
int errno;
char tmp[__NEW_UTS_LEN];
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
@@ -1345,6 +1363,8 @@ int do_prlimit(struct task_struct *tsk, unsigned int resource,
rlim = tsk->signal->rlim + resource;
task_lock(tsk->group_leader);
if (new_rlim) {
+ /* Keep the capable check against init_user_ns until
+ cgroups can contain all limits */
if (new_rlim->rlim_max > rlim->rlim_max &&
!capable(CAP_SYS_RESOURCE))
retval = -EPERM;
@@ -1388,19 +1408,22 @@ static int check_prlimit_permission(struct task_struct *task)
{
const struct cred *cred = current_cred(), *tcred;
- tcred = __task_cred(task);
- if (current != task &&
- (cred->uid != tcred->euid ||
- cred->uid != tcred->suid ||
- cred->uid != tcred->uid ||
- cred->gid != tcred->egid ||
- cred->gid != tcred->sgid ||
- cred->gid != tcred->gid) &&
- !capable(CAP_SYS_RESOURCE)) {
- return -EPERM;
- }
+ if (current == task)
+ return 0;
- return 0;
+ tcred = __task_cred(task);
+ if (cred->user->user_ns == tcred->user->user_ns &&
+ (cred->uid == tcred->euid &&
+ cred->uid == tcred->suid &&
+ cred->uid == tcred->uid &&
+ cred->gid == tcred->egid &&
+ cred->gid == tcred->sgid &&
+ cred->gid == tcred->gid))
+ return 0;
+ if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
+ return 0;
+
+ return -EPERM;
}
SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 40245d6..c0bb324 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -117,6 +117,7 @@ static int neg_one = -1;
static int zero;
static int __maybe_unused one = 1;
static int __maybe_unused two = 2;
+static int __maybe_unused three = 3;
static unsigned long one_ul = 1;
static int one_hundred = 100;
#ifdef CONFIG_PRINTK
@@ -169,6 +170,11 @@ static int proc_taint(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
#endif
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+#endif
+
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses it's own private copy */
static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE;
@@ -706,7 +712,7 @@ static struct ctl_table kern_table[] = {
.data = &kptr_restrict,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_dmesg_restrict,
.extra1 = &zero,
.extra2 = &two,
},
@@ -971,14 +977,18 @@ static struct ctl_table vm_table[] = {
.data = &sysctl_overcommit_memory,
.maxlen = sizeof(sysctl_overcommit_memory),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
},
{
.procname = "panic_on_oom",
.data = &sysctl_panic_on_oom,
.maxlen = sizeof(sysctl_panic_on_oom),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
},
{
.procname = "oom_kill_allocating_task",
@@ -1006,7 +1016,8 @@ static struct ctl_table vm_table[] = {
.data = &page_cluster,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "dirty_background_ratio",
@@ -1054,7 +1065,8 @@ static struct ctl_table vm_table[] = {
.data = &dirty_expire_interval,
.maxlen = sizeof(dirty_expire_interval),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
},
{
.procname = "nr_pdflush_threads",
@@ -1130,6 +1142,8 @@ static struct ctl_table vm_table[] = {
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = drop_caches_sysctl_handler,
+ .extra1 = &one,
+ .extra2 = &three,
},
#ifdef CONFIG_COMPACTION
{
@@ -2385,6 +2399,17 @@ static int proc_taint(struct ctl_table *table, int write,
return err;
}
+#ifdef CONFIG_PRINTK
+static int proc_dmesg_restrict(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ if (write && !capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+}
+#endif
+
struct do_proc_dointvec_minmax_conv_param {
int *min;
int *max;
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 10b90d8..4e4932a 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -111,11 +111,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
const char *fail = NULL;
if (table->parent) {
- if (table->procname && !table->parent->procname)
+ if (!table->parent->procname)
set_fail(&fail, table, "Parent without procname");
}
- if (!table->procname)
- set_fail(&fail, table, "No procname");
if (table->child) {
if (table->data)
set_fail(&fail, table, "Directory with data?");
@@ -144,13 +142,9 @@ int sysctl_check_table(struct nsproxy *namespaces, struct ctl_table *table)
set_fail(&fail, table, "No maxlen");
}
#ifdef CONFIG_PROC_SYSCTL
- if (table->procname && !table->proc_handler)
+ if (!table->proc_handler)
set_fail(&fail, table, "No proc_handler");
#endif
-#if 0
- if (!table->procname && table->proc_handler)
- set_fail(&fail, table, "proc_handler without procname");
-#endif
sysctl_check_leaf(namespaces, table, &fail);
}
if (table->mode > 0777)
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 3971c6b..9ffea36 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -685,7 +685,7 @@ static int __init taskstats_init(void)
goto err_cgroup_ops;
family_registered = 1;
- printk("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
+ pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION);
return 0;
err_cgroup_ops:
genl_unregister_ops(&family, &taskstats_ops);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 5f1bb8e..f6117a4 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -652,6 +652,8 @@ int do_adjtimex(struct timex *txc)
struct timespec delta;
delta.tv_sec = txc->time.tv_sec;
delta.tv_nsec = txc->time.tv_usec;
+ if (!capable(CAP_SYS_TIME))
+ return -EPERM;
if (!(txc->modes & ADJ_NANO))
delta.tv_nsec *= 1000;
result = timekeeping_inject_offset(&delta);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 3bd7e3d..8ad5d57 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -14,7 +14,7 @@
#include <linux/init.h>
#include <linux/mm.h>
#include <linux/sched.h>
-#include <linux/sysdev.h>
+#include <linux/syscore_ops.h>
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/time.h>
@@ -597,13 +597,12 @@ static struct timespec timekeeping_suspend_time;
/**
* timekeeping_resume - Resumes the generic timekeeping subsystem.
- * @dev: unused
*
* This is for the generic clocksource timekeeping.
* xtime/wall_to_monotonic/jiffies/etc are
* still managed by arch specific suspend/resume code.
*/
-static int timekeeping_resume(struct sys_device *dev)
+static void timekeeping_resume(void)
{
unsigned long flags;
struct timespec ts;
@@ -632,11 +631,9 @@ static int timekeeping_resume(struct sys_device *dev)
/* Resume hrtimers */
hres_timers_resume();
-
- return 0;
}
-static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
+static int timekeeping_suspend(void)
{
unsigned long flags;
@@ -654,26 +651,18 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
}
/* sysfs resume/suspend bits for timekeeping */
-static struct sysdev_class timekeeping_sysclass = {
- .name = "timekeeping",
+static struct syscore_ops timekeeping_syscore_ops = {
.resume = timekeeping_resume,
.suspend = timekeeping_suspend,
};
-static struct sys_device device_timer = {
- .id = 0,
- .cls = &timekeeping_sysclass,
-};
-
-static int __init timekeeping_init_device(void)
+static int __init timekeeping_init_ops(void)
{
- int error = sysdev_class_register(&timekeeping_sysclass);
- if (!error)
- error = sysdev_register(&device_timer);
- return error;
+ register_syscore_ops(&timekeeping_syscore_ops);
+ return 0;
}
-device_initcall(timekeeping_init_device);
+device_initcall(timekeeping_init_ops);
/*
* If the error is already larger, we look ahead even further
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index cbafed7..7aa40f8 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -703,28 +703,21 @@ void blk_trace_shutdown(struct request_queue *q)
*
**/
static void blk_add_trace_rq(struct request_queue *q, struct request *rq,
- u32 what)
+ u32 what)
{
struct blk_trace *bt = q->blk_trace;
- int rw = rq->cmd_flags & 0x03;
if (likely(!bt))
return;
- if (rq->cmd_flags & REQ_DISCARD)
- rw |= REQ_DISCARD;
-
- if (rq->cmd_flags & REQ_SECURE)
- rw |= REQ_SECURE;
-
if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
what |= BLK_TC_ACT(BLK_TC_PC);
- __blk_add_trace(bt, 0, blk_rq_bytes(rq), rw,
+ __blk_add_trace(bt, 0, blk_rq_bytes(rq), rq->cmd_flags,
what, rq->errors, rq->cmd_len, rq->cmd);
} else {
what |= BLK_TC_ACT(BLK_TC_FS);
- __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rw,
- what, rq->errors, 0, NULL);
+ __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),
+ rq->cmd_flags, what, rq->errors, 0, NULL);
}
}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 888b611..c075f4e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1467,7 +1467,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
return t_hash_next(m, pos);
(*pos)++;
- iter->pos = *pos;
+ iter->pos = iter->func_pos = *pos;
if (iter->flags & FTRACE_ITER_PRINTALL)
return t_hash_start(m, pos);
@@ -1502,7 +1502,6 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
if (!rec)
return t_hash_start(m, pos);
- iter->func_pos = *pos;
iter->func = rec;
return iter;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 4192098..51c6e89 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -189,7 +189,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
struct group_info *group_info;
int retval;
- if (!capable(CAP_SETGID))
+ if (!nsown_capable(CAP_SETGID))
return -EPERM;
if ((unsigned)gidsetsize > NGROUPS_MAX)
return -EINVAL;
diff --git a/kernel/user.c b/kernel/user.c
index 5c598ca..9e03e9c 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -17,9 +17,13 @@
#include <linux/module.h>
#include <linux/user_namespace.h>
+/*
+ * userns count is 1 for root user, 1 for init_uts_ns,
+ * and 1 for... ?
+ */
struct user_namespace init_user_ns = {
.kref = {
- .refcount = ATOMIC_INIT(2),
+ .refcount = ATOMIC_INIT(3),
},
.creator = &root_user,
};
@@ -47,7 +51,7 @@ static struct kmem_cache *uid_cachep;
*/
static DEFINE_SPINLOCK(uidhash_lock);
-/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->creator */
+/* root_user.__count is 2, 1 for init task cred, 1 for init_user_ns->user_ns */
struct user_struct root_user = {
.__count = ATOMIC_INIT(2),
.processes = ATOMIC_INIT(1),
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 8a82b4b..4464617 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -14,6 +14,7 @@
#include <linux/utsname.h>
#include <linux/err.h>
#include <linux/slab.h>
+#include <linux/user_namespace.h>
static struct uts_namespace *create_uts_ns(void)
{
@@ -30,7 +31,8 @@ static struct uts_namespace *create_uts_ns(void)
* @old_ns: namespace to clone
* Return NULL on error (failure to kmalloc), new ns otherwise
*/
-static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
+static struct uts_namespace *clone_uts_ns(struct task_struct *tsk,
+ struct uts_namespace *old_ns)
{
struct uts_namespace *ns;
@@ -40,6 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
down_read(&uts_sem);
memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
+ ns->user_ns = get_user_ns(task_cred_xxx(tsk, user)->user_ns);
up_read(&uts_sem);
return ns;
}
@@ -50,8 +53,10 @@ static struct uts_namespace *clone_uts_ns(struct uts_namespace *old_ns)
* utsname of this process won't be seen by parent, and vice
* versa.
*/
-struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *old_ns)
+struct uts_namespace *copy_utsname(unsigned long flags,
+ struct task_struct *tsk)
{
+ struct uts_namespace *old_ns = tsk->nsproxy->uts_ns;
struct uts_namespace *new_ns;
BUG_ON(!old_ns);
@@ -60,7 +65,7 @@ struct uts_namespace *copy_utsname(unsigned long flags, struct uts_namespace *ol
if (!(flags & CLONE_NEWUTS))
return old_ns;
- new_ns = clone_uts_ns(old_ns);
+ new_ns = clone_uts_ns(tsk, old_ns);
put_uts_ns(old_ns);
return new_ns;
@@ -71,5 +76,6 @@ void free_uts_ns(struct kref *kref)
struct uts_namespace *ns;
ns = container_of(kref, struct uts_namespace, kref);
+ put_user_ns(ns->user_ns);
kfree(ns);
}