From a664b2d8555c659127bf8fe049a58449d394a707 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 13 Jan 2011 15:47:17 -0800 Subject: thp: madvise(MADV_NOHUGEPAGE) Add madvise MADV_NOHUGEPAGE to mark regions that are not important to be hugepage backed. Return -EINVAL if the vma is not of an anonymous type, or the feature isn't built into the kernel. Never silently return success. Signed-off-by: Andrea Arcangeli Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 14 ++++++++------ include/linux/khugepaged.h | 7 ++++--- include/linux/mm.h | 1 + mm/huge_memory.c | 41 ++++++++++++++++++++++++++++++----------- mm/madvise.c | 4 +++- 5 files changed, 46 insertions(+), 21 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 9b48c24d..a8b7e42 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -52,10 +52,12 @@ extern pmd_t *page_check_address_pmd(struct page *page, #define HPAGE_PMD_SIZE HPAGE_SIZE #define transparent_hugepage_enabled(__vma) \ - (transparent_hugepage_flags & (1<vm_flags & VM_HUGEPAGE)) + ((transparent_hugepage_flags & \ + (1<vm_flags & VM_HUGEPAGE))) && \ + !((__vma)->vm_flags & VM_NOHUGEPAGE)) #define transparent_hugepage_defrag(__vma) \ ((transparent_hugepage_flags & \ (1< MAX_ORDER #error "hugepages can't be allocated by the buddy allocator" #endif -extern int hugepage_madvise(unsigned long *vm_flags); +extern int hugepage_madvise(unsigned long *vm_flags, int advice); extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -141,7 +143,7 @@ static inline int split_huge_page(struct page *page) do { } while (0) #define wait_split_huge_page(__anon_vma, __pmd) \ do { } while (0) -static inline int hugepage_madvise(unsigned long *vm_flags) +static inline int hugepage_madvise(unsigned long *vm_flags, int advice) { BUG(); return 0; diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index 552f318..6b394f0 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -38,9 +38,10 @@ static inline void khugepaged_exit(struct mm_struct *mm) static inline int khugepaged_enter(struct vm_area_struct *vma) { if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) - if (khugepaged_always() || - (khugepaged_req_madv() && - vma->vm_flags & VM_HUGEPAGE)) + if ((khugepaged_always() || + (khugepaged_req_madv() && + vma->vm_flags & VM_HUGEPAGE)) && + !(vma->vm_flags & VM_NOHUGEPAGE)) if (__khugepaged_enter(vma->vm_mm)) return -ENOMEM; return 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index ce97a2b..956a355 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -83,6 +83,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_GROWSUP 0x00000200 #else #define VM_GROWSUP 0x00000000 +#define VM_NOHUGEPAGE 0x00000200 /* MADV_NOHUGEPAGE marked this vma */ #endif #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ #define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4f6041..fce667c 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include "internal.h" @@ -1388,18 +1389,36 @@ out: return ret; } -int hugepage_madvise(unsigned long *vm_flags) +int hugepage_madvise(unsigned long *vm_flags, int advice) { - /* - * Be somewhat over-protective like KSM for now! - */ - if (*vm_flags & (VM_HUGEPAGE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | - VM_MIXEDMAP | VM_SAO)) - return -EINVAL; - - *vm_flags |= VM_HUGEPAGE; + switch (advice) { + case MADV_HUGEPAGE: + /* + * Be somewhat over-protective like KSM for now! + */ + if (*vm_flags & (VM_HUGEPAGE | + VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | + VM_MIXEDMAP | VM_SAO)) + return -EINVAL; + *vm_flags &= ~VM_NOHUGEPAGE; + *vm_flags |= VM_HUGEPAGE; + break; + case MADV_NOHUGEPAGE: + /* + * Be somewhat over-protective like KSM for now! + */ + if (*vm_flags & (VM_NOHUGEPAGE | + VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_RESERVED | VM_HUGETLB | VM_INSERTPAGE | + VM_MIXEDMAP | VM_SAO)) + return -EINVAL; + *vm_flags &= ~VM_HUGEPAGE; + *vm_flags |= VM_NOHUGEPAGE; + break; + } return 0; } diff --git a/mm/madvise.c b/mm/madvise.c index ecde40a..bbac126 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -72,7 +72,8 @@ static long madvise_behavior(struct vm_area_struct * vma, goto out; break; case MADV_HUGEPAGE: - error = hugepage_madvise(&new_flags); + case MADV_NOHUGEPAGE: + error = hugepage_madvise(&new_flags, behavior); if (error) goto out; break; @@ -290,6 +291,7 @@ madvise_behavior_valid(int behavior) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE case MADV_HUGEPAGE: + case MADV_NOHUGEPAGE: #endif return 1; -- cgit v1.1