From 502717f4e112b18d9c37753a32f675bec9f2838b Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Wed, 11 Oct 2006 01:20:46 -0700 Subject: [PATCH] hugetlb: fix linked list corruption in unmap_hugepage_range() commit fe1668ae5bf0145014c71797febd9ad5670d5d05 causes kernel to oops with libhugetlbfs test suite. The problem is that hugetlb pages can be shared by multiple mappings. Multiple threads can fight over page->lru in the unmap path and bad things happen. We now serialize __unmap_hugepage_range to void concurrent linked list manipulation. Such serialization is also needed for shared page table page on hugetlb area. This patch will fixed the bug and also serve as a prepatch for shared page table. Signed-off-by: Ken Chen Cc: Hugh Dickins Cc: David Gibson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hugetlbfs/inode.c | 2 +- include/linux/hugetlb.h | 1 + mm/hugetlb.c | 22 ++++++++++++++++++++-- 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 5e03b2f..4ee3f00 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -293,7 +293,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff) if (h_vm_pgoff >= h_pgoff) v_offset = 0; - unmap_hugepage_range(vma, + __unmap_hugepage_range(vma, vma->vm_start + v_offset, vma->vm_end); } } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c25a38d..5081d27 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -17,6 +17,7 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user * int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int); void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); +void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long); int hugetlb_prefault(struct address_space *, struct vm_area_struct *); int hugetlb_report_meminfo(char *); int hugetlb_report_node_meminfo(int, char *); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1d709ff..2dbec90 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -356,8 +356,8 @@ nomem: return -ENOMEM; } -void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, - unsigned long end) +void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long address; @@ -398,6 +398,24 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } } +void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + /* + * It is undesirable to test vma->vm_file as it should be non-null + * for valid hugetlb area. However, vm_file will be NULL in the error + * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails, + * do_mmap_pgoff() nullifies vma->vm_file before calling this function + * to clean up. Since no pte has actually been setup, it is safe to + * do nothing in this case. + */ + if (vma->vm_file) { + spin_lock(&vma->vm_file->f_mapping->i_mmap_lock); + __unmap_hugepage_range(vma, start, end); + spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock); + } +} + static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, pte_t *ptep, pte_t pte) { -- cgit v1.1