aboutsummaryrefslogtreecommitdiffstats
path: root/mm/truncate.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/truncate.c')
-rw-r--r--mm/truncate.c228
1 files changed, 106 insertions, 122 deletions
diff --git a/mm/truncate.c b/mm/truncate.c
index 143883a..6a11acb 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -12,7 +12,7 @@
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/swap.h>
-#include <linux/export.h>
+#include <linux/module.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
#include <linux/pagevec.h>
@@ -20,7 +20,6 @@
#include <linux/buffer_head.h> /* grr. try_to_release_page,
do_invalidatepage */
#include <linux/cleancache.h>
-#include <linux/rmap.h>
#include "internal.h"
@@ -139,12 +138,18 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
return ret;
}
+#ifdef CONFIG_MACH_P4NOTE
+static int unmap_mapcount = -99;
+#endif
int truncate_inode_page(struct address_space *mapping, struct page *page)
{
if (page_mapped(page)) {
unmap_mapping_range(mapping,
(loff_t)page->index << PAGE_CACHE_SHIFT,
PAGE_CACHE_SIZE, 0);
+#ifdef CONFIG_MACH_P4NOTE
+ unmap_mapcount = atomic_read(&(page)->_mapcount);
+#endif
}
return truncate_complete_page(mapping, page);
}
@@ -200,6 +205,9 @@ int invalidate_inode_page(struct page *page)
* The first pass will remove most pages, so the search cost of the second pass
* is low.
*
+ * When looking at page->index outside the page lock we need to be careful to
+ * copy it into a local to avoid races (it could change at any time).
+ *
* We pass down the cache-hot hint to the page freeing code. Even if the
* mapping is large, it is probably the case that the final pages are the most
* recently touched, and freeing happens in ascending file offset order.
@@ -208,10 +216,10 @@ void truncate_inode_pages_range(struct address_space *mapping,
loff_t lstart, loff_t lend)
{
const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
+ pgoff_t end;
const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
struct pagevec pvec;
- pgoff_t index;
- pgoff_t end;
+ pgoff_t next;
int i;
cleancache_flush_inode(mapping);
@@ -222,21 +230,24 @@ void truncate_inode_pages_range(struct address_space *mapping,
end = (lend >> PAGE_CACHE_SHIFT);
pagevec_init(&pvec, 0);
- index = start;
- while (index <= end && pagevec_lookup(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ next = start;
+ while (next <= end &&
+ pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
+ pgoff_t page_index = page->index;
- /* We rely upon deletion not changing page->index */
- index = page->index;
- if (index > end)
+ if (page_index > end) {
+ next = page_index;
break;
+ }
+ if (page_index > next)
+ next = page_index;
+ next++;
if (!trylock_page(page))
continue;
- WARN_ON(page->index != index);
if (PageWriteback(page)) {
unlock_page(page);
continue;
@@ -247,7 +258,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
cond_resched();
- index++;
}
if (partial) {
@@ -260,17 +270,16 @@ void truncate_inode_pages_range(struct address_space *mapping,
}
}
- index = start;
+ next = start;
for ( ; ; ) {
cond_resched();
- if (!pagevec_lookup(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
- if (index == start)
+ if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+ if (next == start)
break;
- index = start;
+ next = start;
continue;
}
- if (index == start && pvec.pages[0]->index > end) {
+ if (pvec.pages[0]->index > end) {
pagevec_release(&pvec);
break;
}
@@ -278,20 +287,18 @@ void truncate_inode_pages_range(struct address_space *mapping,
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
- /* We rely upon deletion not changing page->index */
- index = page->index;
- if (index > end)
+ if (page->index > end)
break;
-
lock_page(page);
- WARN_ON(page->index != index);
wait_on_page_writeback(page);
truncate_inode_page(mapping, page);
+ if (page->index > next)
+ next = page->index;
+ next++;
unlock_page(page);
}
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
- index++;
}
cleancache_flush_inode(mapping);
}
@@ -332,34 +339,35 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
struct pagevec pvec;
- pgoff_t index = start;
+ pgoff_t next = start;
unsigned long ret;
unsigned long count = 0;
int i;
- /*
- * Note: this function may get called on a shmem/tmpfs mapping:
- * pagevec_lookup() might then return 0 prematurely (because it
- * got a gangful of swap entries); but it's hardly worth worrying
- * about - it can rarely have anything to free from such a mapping
- * (most pages are dirty), and already skips over any difficulties.
- */
-
pagevec_init(&pvec, 0);
- while (index <= end && pagevec_lookup(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ while (next <= end &&
+ pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
+ pgoff_t index;
+ int lock_failed;
- /* We rely upon deletion not changing page->index */
- index = page->index;
- if (index > end)
- break;
+ lock_failed = !trylock_page(page);
- if (!trylock_page(page))
+ /*
+ * We really shouldn't be looking at the ->index of an
+ * unlocked page. But we're not allowed to lock these
+ * pages. So we rely upon nobody altering the ->index
+ * of this (pinned-by-us) page.
+ */
+ index = page->index;
+ if (index > next)
+ next = index;
+ next++;
+ if (lock_failed)
continue;
- WARN_ON(page->index != index);
+
ret = invalidate_inode_page(page);
unlock_page(page);
/*
@@ -369,11 +377,12 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
if (!ret)
deactivate_page(page);
count += ret;
+ if (next > end)
+ break;
}
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
cond_resched();
- index++;
}
return count;
}
@@ -440,32 +449,37 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
pgoff_t start, pgoff_t end)
{
struct pagevec pvec;
- pgoff_t index;
+ pgoff_t next;
int i;
int ret = 0;
int ret2 = 0;
int did_range_unmap = 0;
+ int wrapped = 0;
cleancache_flush_inode(mapping);
pagevec_init(&pvec, 0);
- index = start;
- while (index <= end && pagevec_lookup(&pvec, mapping, index,
- min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
+ next = start;
+ while (next <= end && !wrapped &&
+ pagevec_lookup(&pvec, mapping, next,
+ min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
mem_cgroup_uncharge_start();
for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i];
-
- /* We rely upon deletion not changing page->index */
- index = page->index;
- if (index > end)
- break;
+ pgoff_t page_index;
lock_page(page);
- WARN_ON(page->index != index);
if (page->mapping != mapping) {
unlock_page(page);
continue;
}
+ page_index = page->index;
+ next = page_index + 1;
+ if (next == 0)
+ wrapped = 1;
+ if (page_index > end) {
+ unlock_page(page);
+ break;
+ }
wait_on_page_writeback(page);
if (page_mapped(page)) {
if (!did_range_unmap) {
@@ -473,9 +487,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
* Zap the rest of the file in one hit.
*/
unmap_mapping_range(mapping,
- (loff_t)index << PAGE_CACHE_SHIFT,
- (loff_t)(1 + end - index)
- << PAGE_CACHE_SHIFT,
+ (loff_t)page_index<<PAGE_CACHE_SHIFT,
+ (loff_t)(end - page_index + 1)
+ << PAGE_CACHE_SHIFT,
0);
did_range_unmap = 1;
} else {
@@ -483,8 +497,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
* Just zap this page
*/
unmap_mapping_range(mapping,
- (loff_t)index << PAGE_CACHE_SHIFT,
- PAGE_CACHE_SIZE, 0);
+ (loff_t)page_index<<PAGE_CACHE_SHIFT,
+ PAGE_CACHE_SIZE, 0);
}
}
BUG_ON(page_mapped(page));
@@ -500,7 +514,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
pagevec_release(&pvec);
mem_cgroup_uncharge_end();
cond_resched();
- index++;
}
cleancache_flush_inode(mapping);
return ret;
@@ -525,8 +538,8 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
/**
* truncate_pagecache - unmap and remove pagecache that has been truncated
* @inode: inode
- * @oldsize: old file size
- * @newsize: new file size
+ * @old: old file offset
+ * @new: new file offset
*
* inode's new i_size must already be written before truncate_pagecache
* is called.
@@ -538,10 +551,9 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
* situations such as writepage being called for a page that has already
* had its underlying blocks deallocated.
*/
-void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
+void truncate_pagecache(struct inode *inode, loff_t old, loff_t new)
{
struct address_space *mapping = inode->i_mapping;
- loff_t holebegin = round_up(newsize, PAGE_SIZE);
/*
* unmap_mapping_range is called twice, first simply for
@@ -552,9 +564,9 @@ void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
* truncate_inode_pages finishes, hence the second
* unmap_mapping_range call must be made for correctness.
*/
- unmap_mapping_range(mapping, holebegin, 0, 1);
- truncate_inode_pages(mapping, newsize);
- unmap_mapping_range(mapping, holebegin, 0, 1);
+ unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
+ truncate_inode_pages(mapping, new);
+ unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1);
}
EXPORT_SYMBOL(truncate_pagecache);
@@ -576,82 +588,54 @@ void truncate_setsize(struct inode *inode, loff_t newsize)
oldsize = inode->i_size;
i_size_write(inode, newsize);
- if (newsize > oldsize)
- pagecache_isize_extended(inode, oldsize, newsize);
+
truncate_pagecache(inode, oldsize, newsize);
}
EXPORT_SYMBOL(truncate_setsize);
/**
- * pagecache_isize_extended - update pagecache after extension of i_size
- * @inode: inode for which i_size was extended
- * @from: original inode size
- * @to: new inode size
- *
- * Handle extension of inode size either caused by extending truncate or by
- * write starting after current i_size. We mark the page straddling current
- * i_size RO so that page_mkwrite() is called on the nearest write access to
- * the page. This way filesystem can be sure that page_mkwrite() is called on
- * the page before user writes to the page via mmap after the i_size has been
- * changed.
- *
- * The function must be called after i_size is updated so that page fault
- * coming after we unlock the page will already see the new i_size.
- * The function must be called while we still hold i_mutex - this not only
- * makes sure i_size is stable but also that userspace cannot observe new
- * i_size value before we are prepared to store mmap writes at new inode size.
- */
-void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
-{
- int bsize = 1 << inode->i_blkbits;
- loff_t rounded_from;
- struct page *page;
- pgoff_t index;
-
- WARN_ON(to > inode->i_size);
-
- if (from >= to || bsize == PAGE_CACHE_SIZE)
- return;
- /* Page straddling @from will not have any hole block created? */
- rounded_from = round_up(from, bsize);
- if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
- return;
-
- index = from >> PAGE_CACHE_SHIFT;
- page = find_lock_page(inode->i_mapping, index);
- /* Page not cached? Nothing to do */
- if (!page)
- return;
- /*
- * See clear_page_dirty_for_io() for details why set_page_dirty()
- * is needed.
- */
- if (page_mkclean(page))
- set_page_dirty(page);
- unlock_page(page);
- page_cache_release(page);
-}
-EXPORT_SYMBOL(pagecache_isize_extended);
-
-/**
* vmtruncate - unmap mappings "freed" by truncate() syscall
* @inode: inode of the file used
- * @newsize: file offset to start truncating
+ * @offset: file offset to start truncating
*
* This function is deprecated and truncate_setsize or truncate_pagecache
* should be used instead, together with filesystem specific block truncation.
*/
-int vmtruncate(struct inode *inode, loff_t newsize)
+int vmtruncate(struct inode *inode, loff_t offset)
{
int error;
- error = inode_newsize_ok(inode, newsize);
+ error = inode_newsize_ok(inode, offset);
if (error)
return error;
- truncate_setsize(inode, newsize);
+ truncate_setsize(inode, offset);
if (inode->i_op->truncate)
inode->i_op->truncate(inode);
return 0;
}
EXPORT_SYMBOL(vmtruncate);
+
+int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
+{
+ struct address_space *mapping = inode->i_mapping;
+
+ /*
+ * If the underlying filesystem is not going to provide
+ * a way to truncate a range of blocks (punch a hole) -
+ * we should return failure right now.
+ */
+ if (!inode->i_op->truncate_range)
+ return -ENOSYS;
+
+ mutex_lock(&inode->i_mutex);
+ down_write(&inode->i_alloc_sem);
+ unmap_mapping_range(mapping, offset, (end - offset), 1);
+ inode->i_op->truncate_range(inode, offset, end);
+ /* unmap again to remove racily COWed private pages */
+ unmap_mapping_range(mapping, offset, (end - offset), 1);
+ up_write(&inode->i_alloc_sem);
+ mutex_unlock(&inode->i_mutex);
+
+ return 0;
+}