diff options
Diffstat (limited to 'mm/truncate.c')
-rw-r--r-- | mm/truncate.c | 228 |
1 files changed, 106 insertions, 122 deletions
diff --git a/mm/truncate.c b/mm/truncate.c index 143883a..6a11acb 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -12,7 +12,7 @@ #include <linux/gfp.h> #include <linux/mm.h> #include <linux/swap.h> -#include <linux/export.h> +#include <linux/module.h> #include <linux/pagemap.h> #include <linux/highmem.h> #include <linux/pagevec.h> @@ -20,7 +20,6 @@ #include <linux/buffer_head.h> /* grr. try_to_release_page, do_invalidatepage */ #include <linux/cleancache.h> -#include <linux/rmap.h> #include "internal.h" @@ -139,12 +138,18 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) return ret; } +#ifdef CONFIG_MACH_P4NOTE +static int unmap_mapcount = -99; +#endif int truncate_inode_page(struct address_space *mapping, struct page *page) { if (page_mapped(page)) { unmap_mapping_range(mapping, (loff_t)page->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE, 0); +#ifdef CONFIG_MACH_P4NOTE + unmap_mapcount = atomic_read(&(page)->_mapcount); +#endif } return truncate_complete_page(mapping, page); } @@ -200,6 +205,9 @@ int invalidate_inode_page(struct page *page) * The first pass will remove most pages, so the search cost of the second pass * is low. * + * When looking at page->index outside the page lock we need to be careful to + * copy it into a local to avoid races (it could change at any time). + * * We pass down the cache-hot hint to the page freeing code. Even if the * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. @@ -208,10 +216,10 @@ void truncate_inode_pages_range(struct address_space *mapping, loff_t lstart, loff_t lend) { const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT; + pgoff_t end; const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); struct pagevec pvec; - pgoff_t index; - pgoff_t end; + pgoff_t next; int i; cleancache_flush_inode(mapping); @@ -222,21 +230,24 @@ void truncate_inode_pages_range(struct address_space *mapping, end = (lend >> PAGE_CACHE_SHIFT); pagevec_init(&pvec, 0); - index = start; - while (index <= end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + next = start; + while (next <= end && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; + pgoff_t page_index = page->index; - /* We rely upon deletion not changing page->index */ - index = page->index; - if (index > end) + if (page_index > end) { + next = page_index; break; + } + if (page_index > next) + next = page_index; + next++; if (!trylock_page(page)) continue; - WARN_ON(page->index != index); if (PageWriteback(page)) { unlock_page(page); continue; @@ -247,7 +258,6 @@ void truncate_inode_pages_range(struct address_space *mapping, pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); - index++; } if (partial) { @@ -260,17 +270,16 @@ void truncate_inode_pages_range(struct address_space *mapping, } } - index = start; + next = start; for ( ; ; ) { cond_resched(); - if (!pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { - if (index == start) + if (!pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { + if (next == start) break; - index = start; + next = start; continue; } - if (index == start && pvec.pages[0]->index > end) { + if (pvec.pages[0]->index > end) { pagevec_release(&pvec); break; } @@ -278,20 +287,18 @@ void truncate_inode_pages_range(struct address_space *mapping, for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - /* We rely upon deletion not changing page->index */ - index = page->index; - if (index > end) + if (page->index > end) break; - lock_page(page); - WARN_ON(page->index != index); wait_on_page_writeback(page); truncate_inode_page(mapping, page); + if (page->index > next) + next = page->index; + next++; unlock_page(page); } pagevec_release(&pvec); mem_cgroup_uncharge_end(); - index++; } cleancache_flush_inode(mapping); } @@ -332,34 +339,35 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end) { struct pagevec pvec; - pgoff_t index = start; + pgoff_t next = start; unsigned long ret; unsigned long count = 0; int i; - /* - * Note: this function may get called on a shmem/tmpfs mapping: - * pagevec_lookup() might then return 0 prematurely (because it - * got a gangful of swap entries); but it's hardly worth worrying - * about - it can rarely have anything to free from such a mapping - * (most pages are dirty), and already skips over any difficulties. - */ - pagevec_init(&pvec, 0); - while (index <= end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + while (next <= end && + pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; + pgoff_t index; + int lock_failed; - /* We rely upon deletion not changing page->index */ - index = page->index; - if (index > end) - break; + lock_failed = !trylock_page(page); - if (!trylock_page(page)) + /* + * We really shouldn't be looking at the ->index of an + * unlocked page. But we're not allowed to lock these + * pages. So we rely upon nobody altering the ->index + * of this (pinned-by-us) page. + */ + index = page->index; + if (index > next) + next = index; + next++; + if (lock_failed) continue; - WARN_ON(page->index != index); + ret = invalidate_inode_page(page); unlock_page(page); /* @@ -369,11 +377,12 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping, if (!ret) deactivate_page(page); count += ret; + if (next > end) + break; } pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); - index++; } return count; } @@ -440,32 +449,37 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end) { struct pagevec pvec; - pgoff_t index; + pgoff_t next; int i; int ret = 0; int ret2 = 0; int did_range_unmap = 0; + int wrapped = 0; cleancache_flush_inode(mapping); pagevec_init(&pvec, 0); - index = start; - while (index <= end && pagevec_lookup(&pvec, mapping, index, - min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { + next = start; + while (next <= end && !wrapped && + pagevec_lookup(&pvec, mapping, next, + min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { mem_cgroup_uncharge_start(); for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; - - /* We rely upon deletion not changing page->index */ - index = page->index; - if (index > end) - break; + pgoff_t page_index; lock_page(page); - WARN_ON(page->index != index); if (page->mapping != mapping) { unlock_page(page); continue; } + page_index = page->index; + next = page_index + 1; + if (next == 0) + wrapped = 1; + if (page_index > end) { + unlock_page(page); + break; + } wait_on_page_writeback(page); if (page_mapped(page)) { if (!did_range_unmap) { @@ -473,9 +487,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping, * Zap the rest of the file in one hit. */ unmap_mapping_range(mapping, - (loff_t)index << PAGE_CACHE_SHIFT, - (loff_t)(1 + end - index) - << PAGE_CACHE_SHIFT, + (loff_t)page_index<<PAGE_CACHE_SHIFT, + (loff_t)(end - page_index + 1) + << PAGE_CACHE_SHIFT, 0); did_range_unmap = 1; } else { @@ -483,8 +497,8 @@ int invalidate_inode_pages2_range(struct address_space *mapping, * Just zap this page */ unmap_mapping_range(mapping, - (loff_t)index << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, 0); + (loff_t)page_index<<PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, 0); } } BUG_ON(page_mapped(page)); @@ -500,7 +514,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pagevec_release(&pvec); mem_cgroup_uncharge_end(); cond_resched(); - index++; } cleancache_flush_inode(mapping); return ret; @@ -525,8 +538,8 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); /** * truncate_pagecache - unmap and remove pagecache that has been truncated * @inode: inode - * @oldsize: old file size - * @newsize: new file size + * @old: old file offset + * @new: new file offset * * inode's new i_size must already be written before truncate_pagecache * is called. @@ -538,10 +551,9 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2); * situations such as writepage being called for a page that has already * had its underlying blocks deallocated. */ -void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize) +void truncate_pagecache(struct inode *inode, loff_t old, loff_t new) { struct address_space *mapping = inode->i_mapping; - loff_t holebegin = round_up(newsize, PAGE_SIZE); /* * unmap_mapping_range is called twice, first simply for @@ -552,9 +564,9 @@ void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize) * truncate_inode_pages finishes, hence the second * unmap_mapping_range call must be made for correctness. */ - unmap_mapping_range(mapping, holebegin, 0, 1); - truncate_inode_pages(mapping, newsize); - unmap_mapping_range(mapping, holebegin, 0, 1); + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); + truncate_inode_pages(mapping, new); + unmap_mapping_range(mapping, new + PAGE_SIZE - 1, 0, 1); } EXPORT_SYMBOL(truncate_pagecache); @@ -576,82 +588,54 @@ void truncate_setsize(struct inode *inode, loff_t newsize) oldsize = inode->i_size; i_size_write(inode, newsize); - if (newsize > oldsize) - pagecache_isize_extended(inode, oldsize, newsize); + truncate_pagecache(inode, oldsize, newsize); } EXPORT_SYMBOL(truncate_setsize); /** - * pagecache_isize_extended - update pagecache after extension of i_size - * @inode: inode for which i_size was extended - * @from: original inode size - * @to: new inode size - * - * Handle extension of inode size either caused by extending truncate or by - * write starting after current i_size. We mark the page straddling current - * i_size RO so that page_mkwrite() is called on the nearest write access to - * the page. This way filesystem can be sure that page_mkwrite() is called on - * the page before user writes to the page via mmap after the i_size has been - * changed. - * - * The function must be called after i_size is updated so that page fault - * coming after we unlock the page will already see the new i_size. - * The function must be called while we still hold i_mutex - this not only - * makes sure i_size is stable but also that userspace cannot observe new - * i_size value before we are prepared to store mmap writes at new inode size. - */ -void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to) -{ - int bsize = 1 << inode->i_blkbits; - loff_t rounded_from; - struct page *page; - pgoff_t index; - - WARN_ON(to > inode->i_size); - - if (from >= to || bsize == PAGE_CACHE_SIZE) - return; - /* Page straddling @from will not have any hole block created? */ - rounded_from = round_up(from, bsize); - if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1))) - return; - - index = from >> PAGE_CACHE_SHIFT; - page = find_lock_page(inode->i_mapping, index); - /* Page not cached? Nothing to do */ - if (!page) - return; - /* - * See clear_page_dirty_for_io() for details why set_page_dirty() - * is needed. - */ - if (page_mkclean(page)) - set_page_dirty(page); - unlock_page(page); - page_cache_release(page); -} -EXPORT_SYMBOL(pagecache_isize_extended); - -/** * vmtruncate - unmap mappings "freed" by truncate() syscall * @inode: inode of the file used - * @newsize: file offset to start truncating + * @offset: file offset to start truncating * * This function is deprecated and truncate_setsize or truncate_pagecache * should be used instead, together with filesystem specific block truncation. */ -int vmtruncate(struct inode *inode, loff_t newsize) +int vmtruncate(struct inode *inode, loff_t offset) { int error; - error = inode_newsize_ok(inode, newsize); + error = inode_newsize_ok(inode, offset); if (error) return error; - truncate_setsize(inode, newsize); + truncate_setsize(inode, offset); if (inode->i_op->truncate) inode->i_op->truncate(inode); return 0; } EXPORT_SYMBOL(vmtruncate); + +int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end) +{ + struct address_space *mapping = inode->i_mapping; + + /* + * If the underlying filesystem is not going to provide + * a way to truncate a range of blocks (punch a hole) - + * we should return failure right now. + */ + if (!inode->i_op->truncate_range) + return -ENOSYS; + + mutex_lock(&inode->i_mutex); + down_write(&inode->i_alloc_sem); + unmap_mapping_range(mapping, offset, (end - offset), 1); + inode->i_op->truncate_range(inode, offset, end); + /* unmap again to remove racily COWed private pages */ + unmap_mapping_range(mapping, offset, (end - offset), 1); + up_write(&inode->i_alloc_sem); + mutex_unlock(&inode->i_mutex); + + return 0; +} |