aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/hugetlb.h6
-rw-r--r--include/linux/mempolicy.h3
-rw-r--r--kernel/sysctl.c15
-rw-r--r--mm/hugetlb.c97
-rw-r--r--mm/mempolicy.c47
5 files changed, 153 insertions, 15 deletions
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 41a59af..78b4bc6 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -23,6 +23,12 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
+
+#ifdef CONFIG_NUMA
+int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
+ void __user *, size_t *, loff_t *);
+#endif
+
int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
struct page **, struct vm_area_struct **,
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 085c903..1cc966c 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -201,6 +201,7 @@ extern void mpol_fix_fork_child_flag(struct task_struct *p);
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask);
+extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
extern unsigned slab_node(struct mempolicy *policy);
extern enum zone_type policy_zone;
@@ -328,6 +329,8 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
return node_zonelist(0, gfp_flags);
}
+static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; }
+
static inline int do_migrate_pages(struct mm_struct *mm,
const nodemask_t *from_nodes,
const nodemask_t *to_nodes, int flags)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 554ac48..60fc931 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1051,7 +1051,7 @@ static struct ctl_table vm_table[] = {
.extra2 = &one_hundred,
},
#ifdef CONFIG_HUGETLB_PAGE
- {
+ {
.procname = "nr_hugepages",
.data = NULL,
.maxlen = sizeof(unsigned long),
@@ -1059,7 +1059,18 @@ static struct ctl_table vm_table[] = {
.proc_handler = hugetlb_sysctl_handler,
.extra1 = (void *)&hugetlb_zero,
.extra2 = (void *)&hugetlb_infinity,
- },
+ },
+#ifdef CONFIG_NUMA
+ {
+ .procname = "nr_hugepages_mempolicy",
+ .data = NULL,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = &hugetlb_mempolicy_sysctl_handler,
+ .extra1 = (void *)&hugetlb_zero,
+ .extra2 = (void *)&hugetlb_infinity,
+ },
+#endif
{
.procname = "hugetlb_shm_group",
.data = &sysctl_hugetlb_shm_group,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 324d1ab..1125d81 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1330,29 +1330,71 @@ static struct hstate *kobj_to_hstate(struct kobject *kobj)
return NULL;
}
-static ssize_t nr_hugepages_show(struct kobject *kobj,
+static ssize_t nr_hugepages_show_common(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct hstate *h = kobj_to_hstate(kobj);
return sprintf(buf, "%lu\n", h->nr_huge_pages);
}
-static ssize_t nr_hugepages_store(struct kobject *kobj,
- struct kobj_attribute *attr, const char *buf, size_t count)
+static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
+ struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t len)
{
int err;
- unsigned long input;
+ unsigned long count;
struct hstate *h = kobj_to_hstate(kobj);
+ NODEMASK_ALLOC(nodemask_t, nodes_allowed);
- err = strict_strtoul(buf, 10, &input);
+ err = strict_strtoul(buf, 10, &count);
if (err)
return 0;
- h->max_huge_pages = set_max_huge_pages(h, input, &node_online_map);
+ if (!(obey_mempolicy && init_nodemask_of_mempolicy(nodes_allowed))) {
+ NODEMASK_FREE(nodes_allowed);
+ nodes_allowed = &node_online_map;
+ }
+ h->max_huge_pages = set_max_huge_pages(h, count, nodes_allowed);
- return count;
+ if (nodes_allowed != &node_online_map)
+ NODEMASK_FREE(nodes_allowed);
+
+ return len;
+}
+
+static ssize_t nr_hugepages_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return nr_hugepages_show_common(kobj, attr, buf);
+}
+
+static ssize_t nr_hugepages_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ return nr_hugepages_store_common(false, kobj, attr, buf, len);
}
HSTATE_ATTR(nr_hugepages);
+#ifdef CONFIG_NUMA
+
+/*
+ * hstate attribute for optionally mempolicy-based constraint on persistent
+ * huge page alloc/free.
+ */
+static ssize_t nr_hugepages_mempolicy_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ return nr_hugepages_show_common(kobj, attr, buf);
+}
+
+static ssize_t nr_hugepages_mempolicy_store(struct kobject *kobj,
+ struct kobj_attribute *attr, const char *buf, size_t len)
+{
+ return nr_hugepages_store_common(true, kobj, attr, buf, len);
+}
+HSTATE_ATTR(nr_hugepages_mempolicy);
+#endif
+
+
static ssize_t nr_overcommit_hugepages_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@@ -1408,6 +1450,9 @@ static struct attribute *hstate_attrs[] = {
&free_hugepages_attr.attr,
&resv_hugepages_attr.attr,
&surplus_hugepages_attr.attr,
+#ifdef CONFIG_NUMA
+ &nr_hugepages_mempolicy_attr.attr,
+#endif
NULL,
};
@@ -1574,9 +1619,9 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
}
#ifdef CONFIG_SYSCTL
-int hugetlb_sysctl_handler(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *length, loff_t *ppos)
+static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
+ struct ctl_table *table, int write,
+ void __user *buffer, size_t *length, loff_t *ppos)
{
struct hstate *h = &default_hstate;
unsigned long tmp;
@@ -1588,13 +1633,39 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
table->maxlen = sizeof(unsigned long);
proc_doulongvec_minmax(table, write, buffer, length, ppos);
- if (write)
- h->max_huge_pages = set_max_huge_pages(h, tmp,
- &node_online_map);
+ if (write) {
+ NODEMASK_ALLOC(nodemask_t, nodes_allowed);
+ if (!(obey_mempolicy &&
+ init_nodemask_of_mempolicy(nodes_allowed))) {
+ NODEMASK_FREE(nodes_allowed);
+ nodes_allowed = &node_states[N_HIGH_MEMORY];
+ }
+ h->max_huge_pages = set_max_huge_pages(h, tmp, nodes_allowed);
+
+ if (nodes_allowed != &node_states[N_HIGH_MEMORY])
+ NODEMASK_FREE(nodes_allowed);
+ }
return 0;
}
+int hugetlb_sysctl_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+
+ return hugetlb_sysctl_handler_common(false, table, write,
+ buffer, length, ppos);
+}
+
+#ifdef CONFIG_NUMA
+int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
+ void __user *buffer, size_t *length, loff_t *ppos)
+{
+ return hugetlb_sysctl_handler_common(true, table, write,
+ buffer, length, ppos);
+}
+#endif /* CONFIG_NUMA */
+
int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
void __user *buffer,
size_t *length, loff_t *ppos)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0f89eab..f11fdad 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1568,6 +1568,53 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
}
return zl;
}
+
+/*
+ * init_nodemask_of_mempolicy
+ *
+ * If the current task's mempolicy is "default" [NULL], return 'false'
+ * to indicate default policy. Otherwise, extract the policy nodemask
+ * for 'bind' or 'interleave' policy into the argument nodemask, or
+ * initialize the argument nodemask to contain the single node for
+ * 'preferred' or 'local' policy and return 'true' to indicate presence
+ * of non-default mempolicy.
+ *
+ * We don't bother with reference counting the mempolicy [mpol_get/put]
+ * because the current task is examining it's own mempolicy and a task's
+ * mempolicy is only ever changed by the task itself.
+ *
+ * N.B., it is the caller's responsibility to free a returned nodemask.
+ */
+bool init_nodemask_of_mempolicy(nodemask_t *mask)
+{
+ struct mempolicy *mempolicy;
+ int nid;
+
+ if (!(mask && current->mempolicy))
+ return false;
+
+ mempolicy = current->mempolicy;
+ switch (mempolicy->mode) {
+ case MPOL_PREFERRED:
+ if (mempolicy->flags & MPOL_F_LOCAL)
+ nid = numa_node_id();
+ else
+ nid = mempolicy->v.preferred_node;
+ init_nodemask_of_node(mask, nid);
+ break;
+
+ case MPOL_BIND:
+ /* Fall through */
+ case MPOL_INTERLEAVE:
+ *mask = mempolicy->v.nodes;
+ break;
+
+ default:
+ BUG();
+ }
+
+ return true;
+}
#endif
/* Allocate a page in interleaved policy.