From ac9b9c667c2e1194e22ebe0a441ae1c37aaa9b90 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Thu, 20 Oct 2005 16:24:28 +0100
Subject: [PATCH] Fix handling spurious page fault for hugetlb region

This reverts commit 3359b54c8c07338f3a863d1109b42eebccdcf379 and
replaces it with a cleaner version that is purely based on page table
operations, so that the synchronization between inode size and hugetlb
mappings becomes moot.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 mm/hugetlb.c | 22 ++++++++++++++++++++++
 mm/memory.c  | 14 ++------------
 2 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'mm')

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a1b30d4..61d3806 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -394,6 +394,28 @@ out:
 	return ret;
 }
 
+/*
+ * On ia64 at least, it is possible to receive a hugetlb fault from a
+ * stale zero entry left in the TLB from earlier hardware prefetching.
+ * Low-level arch code should already have flushed the stale entry as
+ * part of its fault handling, but we do need to accept this minor fault
+ * and return successfully.  Whereas the "normal" case is that this is
+ * an access to a hugetlb page which has been truncated off since mmap.
+ */
+int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+			unsigned long address, int write_access)
+{
+	int ret = VM_FAULT_SIGBUS;
+	pte_t *pte;
+
+	spin_lock(&mm->page_table_lock);
+	pte = huge_pte_offset(mm, address);
+	if (pte && !pte_none(*pte))
+		ret = VM_FAULT_MINOR;
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
 int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			struct page **pages, struct vm_area_struct **vmas,
 			unsigned long *position, int *length, int i)
diff --git a/mm/memory.c b/mm/memory.c
index 8c88b97..1db40e9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2045,18 +2045,8 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 
 	inc_page_state(pgfault);
 
-	if (unlikely(is_vm_hugetlb_page(vma))) {
-		if (valid_hugetlb_file_off(vma, address))
-			/* We get here only if there was a stale(zero) TLB entry 
-			 * (because of  HW prefetching). 
-			 * Low-level arch code (if needed) should have already
-			 * purged the stale entry as part of this fault handling.  
-			 * Here we just return.
-			 */
-			return VM_FAULT_MINOR; 
-		else
-			return VM_FAULT_SIGBUS;	/* mapping truncation does this. */
-	}
+	if (unlikely(is_vm_hugetlb_page(vma)))
+		return hugetlb_fault(mm, vma, address, write_access);
 
 	/*
 	 * We need the page table lock to synchronize with kswapd
-- 
cgit v1.1