aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/ia64/kernel/entry.S1
-rw-r--r--arch/x86_64/ia32/ia32entry.S1
-rw-r--r--include/asm-i386/unistd.h3
-rw-r--r--include/asm-ia64/unistd.h3
-rw-r--r--include/asm-x86_64/ia32_unistd.h3
-rw-r--r--include/asm-x86_64/unistd.h4
-rw-r--r--include/linux/mempolicy.h3
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--kernel/sys_ni.c1
-rw-r--r--mm/mempolicy.c94
11 files changed, 111 insertions, 5 deletions
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index f7ba4ac..6ff3e52 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -293,3 +293,4 @@ ENTRY(sys_call_table)
.long sys_inotify_init
.long sys_inotify_add_watch
.long sys_inotify_rm_watch
+ .long sys_migrate_pages
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
index 0741b066..7a6ffd6 100644
--- a/arch/ia64/kernel/entry.S
+++ b/arch/ia64/kernel/entry.S
@@ -1600,5 +1600,6 @@ sys_call_table:
data8 sys_inotify_init
data8 sys_inotify_add_watch
data8 sys_inotify_rm_watch
+ data8 sys_migrate_pages // 1280
.org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S
index df0773c..1f0ff5a 100644
--- a/arch/x86_64/ia32/ia32entry.S
+++ b/arch/x86_64/ia32/ia32entry.S
@@ -643,6 +643,7 @@ ia32_sys_call_table:
.quad sys_inotify_init
.quad sys_inotify_add_watch
.quad sys_inotify_rm_watch
+ .quad sys_migrate_pages
ia32_syscall_end:
.rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8
.quad ni_syscall
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index fe38b9a..481c3c0 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -299,8 +299,9 @@
#define __NR_inotify_init 291
#define __NR_inotify_add_watch 292
#define __NR_inotify_rm_watch 293
+#define __NR_migrate_pages 294
-#define NR_syscalls 294
+#define NR_syscalls 295
/*
* user-visible error numbers are in the range -1 - -128: see
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 2bf5434..962f9bd 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -269,12 +269,13 @@
#define __NR_inotify_init 1277
#define __NR_inotify_add_watch 1278
#define __NR_inotify_rm_watch 1279
+#define __NR_migrate_pages 1280
#ifdef __KERNEL__
#include <linux/config.h>
-#define NR_syscalls 256 /* length of syscall table */
+#define NR_syscalls 270 /* length of syscall table */
#define __ARCH_WANT_SYS_RT_SIGACTION
diff --git a/include/asm-x86_64/ia32_unistd.h b/include/asm-x86_64/ia32_unistd.h
index d5166ec..e884336 100644
--- a/include/asm-x86_64/ia32_unistd.h
+++ b/include/asm-x86_64/ia32_unistd.h
@@ -299,7 +299,8 @@
#define __NR_ia32_inotify_init 291
#define __NR_ia32_inotify_add_watch 292
#define __NR_ia32_inotify_rm_watch 293
+#define __NR_ia32_migrate_pages 294
-#define IA32_NR_syscalls 294 /* must be > than biggest syscall! */
+#define IA32_NR_syscalls 295 /* must be > than biggest syscall! */
#endif /* _ASM_X86_64_IA32_UNISTD_H_ */
diff --git a/include/asm-x86_64/unistd.h b/include/asm-x86_64/unistd.h
index 2c42150..e6f8961 100644
--- a/include/asm-x86_64/unistd.h
+++ b/include/asm-x86_64/unistd.h
@@ -571,8 +571,10 @@ __SYSCALL(__NR_inotify_init, sys_inotify_init)
__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch)
#define __NR_inotify_rm_watch 255
__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch)
+#define __NR_migrate_pages 256
+__SYSCALL(__NR_migrate_pages, sys_migrate_pages)
-#define __NR_syscall_max __NR_inotify_rm_watch
+#define __NR_syscall_max __NR_migrate_pages
#ifndef __NO_STUBS
/* user-visible error numbers are in the range -1 - -4095 */
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 05443a7..3e61e82 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -162,6 +162,9 @@ static inline void check_highest_zone(int k)
policy_zone = k;
}
+int do_migrate_pages(struct mm_struct *mm,
+ const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);
+
#else
struct mempolicy {};
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c7007b1..e910d1a 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -511,5 +511,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio);
asmlinkage long sys_ioprio_get(int which, int who);
asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
unsigned long maxnode);
+asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
+ const unsigned long __user *from, const unsigned long __user *to);
#endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 1ab2370..7a8bc7f 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -82,6 +82,7 @@ cond_syscall(compat_sys_socketcall);
cond_syscall(sys_inotify_init);
cond_syscall(sys_inotify_add_watch);
cond_syscall(sys_inotify_rm_watch);
+cond_syscall(sys_migrate_pages);
/* arch-specific weak syscall entries */
cond_syscall(sys_pciconfig_read);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9cc6d96..20d5ad3 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -615,11 +615,41 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask,
}
/*
+ * For now migrate_pages simply swaps out the pages from nodes that are in
+ * the source set but not in the target set. In the future, we would
+ * want a function that moves pages between the two nodesets in such
+ * a way as to preserve the physical layout as much as possible.
+ *
+ * Returns the number of page that could not be moved.
+ */
+int do_migrate_pages(struct mm_struct *mm,
+ const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags)
+{
+ LIST_HEAD(pagelist);
+ int count = 0;
+ nodemask_t nodes;
+
+ nodes_andnot(nodes, *from_nodes, *to_nodes);
+ nodes_complement(nodes, nodes);
+
+ down_read(&mm->mmap_sem);
+ check_range(mm, mm->mmap->vm_start, TASK_SIZE, &nodes,
+ flags | MPOL_MF_DISCONTIG_OK, &pagelist);
+ if (!list_empty(&pagelist)) {
+ migrate_pages(&pagelist, NULL);
+ if (!list_empty(&pagelist))
+ count = putback_lru_pages(&pagelist);
+ }
+ up_read(&mm->mmap_sem);
+ return count;
+}
+
+/*
* User space interface with variable sized bitmaps for nodelists.
*/
/* Copy a node mask from user space. */
-static int get_nodes(nodemask_t *nodes, unsigned long __user *nmask,
+static int get_nodes(nodemask_t *nodes, const unsigned long __user *nmask,
unsigned long maxnode)
{
unsigned long k;
@@ -708,6 +738,68 @@ asmlinkage long sys_set_mempolicy(int mode, unsigned long __user *nmask,
return do_set_mempolicy(mode, &nodes);
}
+/* Macro needed until Paul implements this function in kernel/cpusets.c */
+#define cpuset_mems_allowed(task) node_online_map
+
+asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode,
+ const unsigned long __user *old_nodes,
+ const unsigned long __user *new_nodes)
+{
+ struct mm_struct *mm;
+ struct task_struct *task;
+ nodemask_t old;
+ nodemask_t new;
+ nodemask_t task_nodes;
+ int err;
+
+ err = get_nodes(&old, old_nodes, maxnode);
+ if (err)
+ return err;
+
+ err = get_nodes(&new, new_nodes, maxnode);
+ if (err)
+ return err;
+
+ /* Find the mm_struct */
+ read_lock(&tasklist_lock);
+ task = pid ? find_task_by_pid(pid) : current;
+ if (!task) {
+ read_unlock(&tasklist_lock);
+ return -ESRCH;
+ }
+ mm = get_task_mm(task);
+ read_unlock(&tasklist_lock);
+
+ if (!mm)
+ return -EINVAL;
+
+ /*
+ * Check if this process has the right to modify the specified
+ * process. The right exists if the process has administrative
+ * capabilities, superuser priviledges or the same
+ * userid as the target process.
+ */
+ if ((current->euid != task->suid) && (current->euid != task->uid) &&
+ (current->uid != task->suid) && (current->uid != task->uid) &&
+ !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ task_nodes = cpuset_mems_allowed(task);
+ /* Is the user allowed to access the target nodes? */
+ if (!nodes_subset(new, task_nodes) && !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ err = do_migrate_pages(mm, &old, &new, MPOL_MF_MOVE);
+out:
+ mmput(mm);
+ return err;
+}
+
+
/* Retrieve NUMA policy */
asmlinkage long sys_get_mempolicy(int __user *policy,
unsigned long __user *nmask,