From 56b0dacfa2b8416815a2f2a5f4f51e46be4cf14c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Oct 2010 10:48:55 +0200 Subject: fs: mark destroy_inode static Hugetlbfs used to need it, but after the destroy_inode and evict_inode changes it's not required anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 8646433..3368abd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -235,7 +235,7 @@ void __destroy_inode(struct inode *inode) } EXPORT_SYMBOL(__destroy_inode); -void destroy_inode(struct inode *inode) +static void destroy_inode(struct inode *inode) { __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) -- cgit v1.1 From a3314a0ed389f51a51695120b429eccd45b3a165 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Oct 2010 22:38:00 +0900 Subject: lockdep: fixup checking of dir inode annotation Since inode->i_mode shares its bits for S_IFMT, S_ISDIR should be used to distinguish whether it is a dir or not. Signed-off-by: Namhyung Kim Signed-off-by: Al Viro --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 3368abd..4f0a67c5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -663,7 +663,7 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { #ifdef CONFIG_DEBUG_LOCK_ALLOC - if (inode->i_mode & S_IFDIR) { + if (S_ISDIR(inode->i_mode)) { struct file_system_type *type = inode->i_sb->s_type; /* Set new key only if filesystem hasn't already changed it */ -- cgit v1.1 From a8dade34e3df581bc36ca2afe6e27055e178801c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Oct 2010 11:13:10 -0400 Subject: unexport invalidate_inodes Signed-off-by: Al Viro --- fs/inode.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 4f0a67c5..db7c74c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -417,7 +417,6 @@ int invalidate_inodes(struct super_block *sb) return busy; } -EXPORT_SYMBOL(invalidate_inodes); static int can_unuse(struct inode *inode) { -- cgit v1.1 From 1d3382cbf02986e4833849f528d451367ea0b4cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Oct 2010 15:19:20 -0400 Subject: new helper: inode_unhashed() note: for race-free uses you inode_lock held Signed-off-by: Al Viro --- fs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index db7c74c..4440cf1 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1094,7 +1094,7 @@ int insert_inode_locked(struct inode *inode) __iget(old); spin_unlock(&inode_lock); wait_on_inode(old); - if (unlikely(!hlist_unhashed(&old->i_hash))) { + if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } @@ -1133,7 +1133,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, __iget(old); spin_unlock(&inode_lock); wait_on_inode(old); - if (unlikely(!hlist_unhashed(&old->i_hash))) { + if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } @@ -1186,7 +1186,7 @@ EXPORT_SYMBOL(generic_delete_inode); */ int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || hlist_unhashed(&inode->i_hash); + return !inode->i_nlink || inode_unhashed(inode); } EXPORT_SYMBOL_GPL(generic_drop_inode); -- cgit v1.1 From cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 23 Oct 2010 05:03:02 -0400 Subject: fs: Convert nr_inodes and nr_unused to per-cpu counters The number of inodes allocated does not need to be tied to the addition or removal of an inode to/from a list. If we are not tied to a list lock, we could update the counters when inodes are initialised or destroyed, but to do that we need to convert the counters to be per-cpu (i.e. independent of a lock). This means that we have the freedom to change the list/locking implementation without needing to care about the counters. Based on a patch originally from Eric Dumazet. [AV: cleaned up a bit, fixed build breakage on weird configs Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 64 +++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 45 insertions(+), 19 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 4440cf1..0d5aecc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem); */ struct inodes_stat_t inodes_stat; +static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; +static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; + static struct kmem_cache *inode_cachep __read_mostly; +static inline int get_nr_inodes(void) +{ + return percpu_counter_sum_positive(&nr_inodes); +} + +static inline int get_nr_inodes_unused(void) +{ + return percpu_counter_sum_positive(&nr_inodes_unused); +} + +int get_nr_dirty_inodes(void) +{ + int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); + return nr_dirty > 0 ? nr_dirty : 0; + +} + +/* + * Handle nr_inode sysctl + */ +#ifdef CONFIG_SYSCTL +int proc_nr_inodes(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + inodes_stat.nr_inodes = get_nr_inodes(); + inodes_stat.nr_unused = get_nr_inodes_unused(); + return proc_dointvec(table, write, buffer, lenp, ppos); +} +#endif + static void wake_up_inode(struct inode *inode) { /* @@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif + percpu_counter_inc(&nr_inodes); + return 0; out: return -ENOMEM; @@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif + percpu_counter_dec(&nr_inodes); } EXPORT_SYMBOL(__destroy_inode); @@ -286,7 +322,7 @@ void __iget(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_in_use); - inodes_stat.nr_unused--; + percpu_counter_dec(&nr_inodes_unused); } void end_writeback(struct inode *inode) @@ -327,8 +363,6 @@ static void evict(struct inode *inode) */ static void dispose_list(struct list_head *head) { - int nr_disposed = 0; - while (!list_empty(head)) { struct inode *inode; @@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head) wake_up_inode(inode); destroy_inode(inode); - nr_disposed++; } - spin_lock(&inode_lock); - inodes_stat.nr_inodes -= nr_disposed; - spin_unlock(&inode_lock); } /* @@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head) static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; - int busy = 0, count = 0; + int busy = 0; next = head->next; for (;;) { @@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - count++; + percpu_counter_dec(&nr_inodes_unused); continue; } busy = 1; } - /* only unused inodes may be cached with i_count zero */ - inodes_stat.nr_unused -= count; return busy; } @@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode) static void prune_icache(int nr_to_scan) { LIST_HEAD(freeable); - int nr_pruned = 0; int nr_scanned; unsigned long reap = 0; @@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan) list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - nr_pruned++; + percpu_counter_dec(&nr_inodes_unused); } - inodes_stat.nr_unused -= nr_pruned; if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else @@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) return -1; prune_icache(nr); } - return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; + return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; } static struct shrinker icache_shrinker = { @@ -594,7 +620,6 @@ static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); if (head) @@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode) if (!drop) { if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; + percpu_counter_inc(&nr_inodes_unused); if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); return; @@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; + percpu_counter_dec(&nr_inodes_unused); hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; spin_unlock(&inode_lock); evict(inode); spin_lock(&inode_lock); @@ -1502,6 +1526,8 @@ void __init inode_init(void) SLAB_MEM_SPREAD), init_once); register_shrinker(&icache_shrinker); + percpu_counter_init(&nr_inodes, 0); + percpu_counter_init(&nr_inodes_unused, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist) -- cgit v1.1 From 9e38d86ff2d8a8db99570e982230861046df32b5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 23 Oct 2010 06:55:17 -0400 Subject: fs: Implement lazy LRU updates for inodes Convert the inode LRU to use lazy updates to reduce lock and cacheline traffic. We avoid moving inodes around in the LRU list during iget/iput operations so these frequent operations don't need to access the LRUs. Instead, we defer the refcount checks to reclaim-time and use a per-inode state flag, I_REFERENCED, to tell reclaim that iget has touched the inode in the past. This means that only reclaim should be touching the LRU with any frequency, hence significantly reducing lock acquisitions and the amount contention on LRU updates. This also removes the inode_in_use list, which means we now only have one list for tracking the inode LRU status. This makes it much simpler to split out the LRU list operations under it's own lock. Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 86 +++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 26 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 0d5aecc..3bdc76f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; * allowing for low-overhead inode sync() operations. */ -LIST_HEAD(inode_in_use); -LIST_HEAD(inode_unused); +static LIST_HEAD(inode_unused); static struct hlist_head *inode_hashtable __read_mostly; /* @@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); + INIT_LIST_HEAD(&inode->i_list); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -317,12 +317,23 @@ static void init_once(void *foo) */ void __iget(struct inode *inode) { - if (atomic_inc_return(&inode->i_count) != 1) - return; + atomic_inc(&inode->i_count); +} - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_in_use); - percpu_counter_dec(&nr_inodes_unused); +static void inode_lru_list_add(struct inode *inode) +{ + if (list_empty(&inode->i_list)) { + list_add(&inode->i_list, &inode_unused); + percpu_counter_inc(&nr_inodes_unused); + } +} + +static void inode_lru_list_del(struct inode *inode) +{ + if (!list_empty(&inode->i_list)) { + list_del_init(&inode->i_list); + percpu_counter_dec(&nr_inodes_unused); + } } void end_writeback(struct inode *inode) @@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head) struct inode *inode; inode = list_first_entry(head, struct inode, i_list); - list_del(&inode->i_list); + list_del_init(&inode->i_list); evict(inode); @@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - percpu_counter_dec(&nr_inodes_unused); + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); continue; } busy = 1; @@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb) static int can_unuse(struct inode *inode) { - if (inode->i_state) + if (inode->i_state & ~I_REFERENCED) return 0; if (inode_has_buffers(inode)) return 0; @@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode) } /* - * Scan `goal' inodes on the unused list for freeable ones. They are moved to - * a temporary list and then are freed outside inode_lock by dispose_list(). + * Scan `goal' inodes on the unused list for freeable ones. They are moved to a + * temporary list and then are freed outside inode_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their - * pagecache removed. We expect the final iput() on that inode to add it to - * the front of the inode_unused list. So look for it there and if the - * inode is still freeable, proceed. The right inode is found 99.9% of the - * time in testing on a 4-way. + * pagecache removed. If the inode has metadata buffers attached to + * mapping->private_list then try to remove them. * - * If the inode has metadata buffers attached to mapping->private_list then - * try to remove them. + * If the inode has the I_REFERENCED flag set, then it means that it has been + * used recently - the flag is set in iput_final(). When we encounter such an + * inode, clear the flag and move it to the back of the LRU so it gets another + * pass through the LRU before it gets reclaimed. This is necessary because of + * the fact we are doing lazy LRU updates to minimise lock contention so the + * LRU does not have strict ordering. Hence we don't want to reclaim inodes + * with this flag set because they are the inodes that are out of order. */ static void prune_icache(int nr_to_scan) { @@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan) inode = list_entry(inode_unused.prev, struct inode, i_list); - if (inode->i_state || atomic_read(&inode->i_count)) { + /* + * Referenced or dirty inodes are still in use. Give them + * another pass through the LRU as we canot reclaim them now. + */ + if (atomic_read(&inode->i_count) || + (inode->i_state & ~I_REFERENCED)) { + list_del_init(&inode->i_list); + percpu_counter_dec(&nr_inodes_unused); + continue; + } + + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { list_move(&inode->i_list, &inode_unused); + inode->i_state &= ~I_REFERENCED; continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { @@ -620,7 +648,6 @@ static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); if (head) hlist_add_head(&inode->i_hash, head); @@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode) drop = generic_drop_inode(inode); if (!drop) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_unused); - percpu_counter_inc(&nr_inodes_unused); if (sb->s_flags & MS_ACTIVE) { + inode->i_state |= I_REFERENCED; + if (!(inode->i_state & (I_DIRTY|I_SYNC))) { + inode_lru_list_add(inode); + } spin_unlock(&inode_lock); return; } @@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - percpu_counter_dec(&nr_inodes_unused); hlist_del_init(&inode->i_hash); } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + + /* + * After we delete the inode from the LRU here, we avoid moving dirty + * inodes back onto the LRU now because I_FREEING is set and hence + * writeback_single_inode() won't move the inode around. + */ + inode_lru_list_del(inode); + + list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); evict(inode); spin_lock(&inode_lock); -- cgit v1.1 From 4c51acbc66f754e536e1c9e3331656b69bce86d0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 23 Oct 2010 06:58:09 -0400 Subject: fs: Factor inode hash operations into functions Before replacing the inode hash locking with a more scalable mechanism, factor the removal of the inode from the hashes rather than open coding it in several places. Based on a patch originally from Nick Piggin. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 100 +++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 45 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 3bdc76f..5e5bafe 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -336,6 +336,58 @@ static void inode_lru_list_del(struct inode *inode) } } +static unsigned long hash(struct super_block *sb, unsigned long hashval) +{ + unsigned long tmp; + + tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / + L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); + return tmp & I_HASHMASK; +} + +/** + * __insert_inode_hash - hash an inode + * @inode: unhashed inode + * @hashval: unsigned long value used to locate this object in the + * inode_hashtable. + * + * Add an inode to the inode hash for this superblock. + */ +void __insert_inode_hash(struct inode *inode, unsigned long hashval) +{ + struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + spin_lock(&inode_lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL(__insert_inode_hash); + +/** + * __remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock. + */ +static void __remove_inode_hash(struct inode *inode) +{ + hlist_del_init(&inode->i_hash); +} + +/** + * remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock. + */ +void remove_inode_hash(struct inode *inode) +{ + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL(remove_inode_hash); + void end_writeback(struct inode *inode) { might_sleep(); @@ -383,7 +435,7 @@ static void dispose_list(struct list_head *head) evict(inode); spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); + __remove_inode_hash(inode); list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); @@ -634,16 +686,6 @@ repeat: return node ? inode : NULL; } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; -} - static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) @@ -1194,36 +1236,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } EXPORT_SYMBOL(insert_inode_locked4); -/** - * __insert_inode_hash - hash an inode - * @inode: unhashed inode - * @hashval: unsigned long value used to locate this object in the - * inode_hashtable. - * - * Add an inode to the inode hash for this superblock. - */ -void __insert_inode_hash(struct inode *inode, unsigned long hashval) -{ - struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); - spin_lock(&inode_lock); - hlist_add_head(&inode->i_hash, head); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL(__insert_inode_hash); - -/** - * remove_inode_hash - remove an inode from the hash - * @inode: inode to unhash - * - * Remove an inode from the superblock. - */ -void remove_inode_hash(struct inode *inode) -{ - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL(remove_inode_hash); int generic_delete_inode(struct inode *inode) { @@ -1279,7 +1291,7 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - hlist_del_init(&inode->i_hash); + __remove_inode_hash(inode); } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; @@ -1294,9 +1306,7 @@ static void iput_final(struct inode *inode) list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); evict(inode); - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); + remove_inode_hash(inode); wake_up_inode(inode); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); destroy_inode(inode); -- cgit v1.1 From ad5e195ac9fdf4e2b28b8cf14937e5b9384dac2e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 07:00:16 -0400 Subject: fs: Stop abusing find_inode_fast in iunique Stop abusing find_inode_fast for iunique and opencode the inode hash walk. Introduce a new iunique_lock to protect the iunique counters once inode_lock is removed. Based on a patch originally from Nick Piggin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 5e5bafe..a8035e8 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -884,6 +884,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb, return inode; } +/* + * search the inode cache for a matching inode number. + * If we find one, then the inode number we are trying to + * allocate is not unique and so we should not use it. + * + * Returns 1 if the inode number is unique, 0 if it is not. + */ +static int test_inode_iunique(struct super_block *sb, unsigned long ino) +{ + struct hlist_head *b = inode_hashtable + hash(sb, ino); + struct hlist_node *node; + struct inode *inode; + + hlist_for_each_entry(inode, node, b, i_hash) { + if (inode->i_ino == ino && inode->i_sb == sb) + return 0; + } + + return 1; +} + /** * iunique - get a unique inode number * @sb: superblock @@ -905,19 +926,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) * error if st_ino won't fit in target struct field. Use 32bit counter * here to attempt to avoid that. */ + static DEFINE_SPINLOCK(iunique_lock); static unsigned int counter; - struct inode *inode; - struct hlist_head *head; ino_t res; spin_lock(&inode_lock); + spin_lock(&iunique_lock); do { if (counter <= max_reserved) counter = max_reserved + 1; res = counter++; - head = inode_hashtable + hash(sb, res); - inode = find_inode_fast(sb, head, res); - } while (inode != NULL); + } while (!test_inode_iunique(sb, res)); + spin_unlock(&iunique_lock); spin_unlock(&inode_lock); return res; -- cgit v1.1 From f7899bd5472e8e99741369b4a32eca44e5282a85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 07:09:06 -0400 Subject: fs: move i_count increments into find_inode/find_inode_fast Now that iunique is not abusing find_inode anymore we can move the i_ref increment back to where it belongs. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index a8035e8..78c41c6 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -634,9 +634,6 @@ static struct shrinker icache_shrinker = { static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. - * NOTE: we are not increasing the inode-refcount, you must call __iget() - * by hand after calling find_inode now! This simplifies iunique and won't - * add any additional branch in the common code. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, @@ -656,9 +653,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } - break; + __iget(inode); + return inode; } - return node ? inode : NULL; + return NULL; } /* @@ -681,9 +679,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } - break; + __iget(inode); + return inode; } - return node ? inode : NULL; + return NULL; } static inline void @@ -828,7 +827,6 @@ static struct inode *get_new_inode(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - __iget(old); spin_unlock(&inode_lock); destroy_inode(inode); inode = old; @@ -875,7 +873,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - __iget(old); spin_unlock(&inode_lock); destroy_inode(inode); inode = old; @@ -989,7 +986,6 @@ static struct inode *ifind(struct super_block *sb, spin_lock(&inode_lock); inode = find_inode(sb, head, test, data); if (inode) { - __iget(inode); spin_unlock(&inode_lock); if (likely(wait)) wait_on_inode(inode); @@ -1022,7 +1018,6 @@ static struct inode *ifind_fast(struct super_block *sb, spin_lock(&inode_lock); inode = find_inode_fast(sb, head, ino); if (inode) { - __iget(inode); spin_unlock(&inode_lock); wait_on_inode(inode); return inode; -- cgit v1.1 From 646ec4615cd05972581c9c5342ed7a1e77df17bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 07:15:32 -0400 Subject: fs: remove inode_add_to_list/__inode_add_to_list Split up inode_add_to_list/__inode_add_to_list. Locking for the two lists will be split soon so these helpers really don't buy us much anymore. The __ prefixes for the sb list helpers will go away soon, but until inode_lock is gone we'll need them to distinguish between the locked and unlocked variants. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 70 ++++++++++++++++++++++++++++---------------------------------- 1 file changed, 32 insertions(+), 38 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 78c41c6..430d70f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -336,6 +336,28 @@ static void inode_lru_list_del(struct inode *inode) } } +static inline void __inode_sb_list_add(struct inode *inode) +{ + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); +} + +/** + * inode_sb_list_add - add inode to the superblock list of inodes + * @inode: inode to add + */ +void inode_sb_list_add(struct inode *inode) +{ + spin_lock(&inode_lock); + __inode_sb_list_add(inode); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL_GPL(inode_sb_list_add); + +static inline void __inode_sb_list_del(struct inode *inode) +{ + list_del_init(&inode->i_sb_list); +} + static unsigned long hash(struct super_block *sb, unsigned long hashval) { unsigned long tmp; @@ -356,9 +378,10 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { - struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); + spin_lock(&inode_lock); - hlist_add_head(&inode->i_hash, head); + hlist_add_head(&inode->i_hash, b); spin_unlock(&inode_lock); } EXPORT_SYMBOL(__insert_inode_hash); @@ -436,7 +459,7 @@ static void dispose_list(struct list_head *head) spin_lock(&inode_lock); __remove_inode_hash(inode); - list_del_init(&inode->i_sb_list); + __inode_sb_list_del(inode); spin_unlock(&inode_lock); wake_up_inode(inode); @@ -685,37 +708,6 @@ repeat: return NULL; } -static inline void -__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, - struct inode *inode) -{ - list_add(&inode->i_sb_list, &sb->s_inodes); - if (head) - hlist_add_head(&inode->i_hash, head); -} - -/** - * inode_add_to_lists - add a new inode to relevant lists - * @sb: superblock inode belongs to - * @inode: inode to mark in use - * - * When an inode is allocated it needs to be accounted for, added to the in use - * list, the owning superblock and the inode hash. This needs to be done under - * the inode_lock, so export a function to do this rather than the inode lock - * itself. We calculate the hash list to add to here so it is all internal - * which requires the caller to have already set up the inode number in the - * inode to add. - */ -void inode_add_to_lists(struct super_block *sb, struct inode *inode) -{ - struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); - - spin_lock(&inode_lock); - __inode_add_to_lists(sb, head, inode); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL_GPL(inode_add_to_lists); - /** * new_inode - obtain an inode * @sb: superblock @@ -743,7 +735,7 @@ struct inode *new_inode(struct super_block *sb) inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); - __inode_add_to_lists(sb, NULL, inode); + __inode_sb_list_add(inode); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -812,7 +804,8 @@ static struct inode *get_new_inode(struct super_block *sb, if (set(inode, data)) goto set_failed; - __inode_add_to_lists(sb, head, inode); + hlist_add_head(&inode->i_hash, head); + __inode_sb_list_add(inode); inode->i_state = I_NEW; spin_unlock(&inode_lock); @@ -858,7 +851,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb, old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; - __inode_add_to_lists(sb, head, inode); + hlist_add_head(&inode->i_hash, head); + __inode_sb_list_add(inode); inode->i_state = I_NEW; spin_unlock(&inode_lock); @@ -1318,7 +1312,7 @@ static void iput_final(struct inode *inode) */ inode_lru_list_del(inode); - list_del_init(&inode->i_sb_list); + __inode_sb_list_del(inode); spin_unlock(&inode_lock); evict(inode); remove_inode_hash(inode); -- cgit v1.1 From 7de9c6ee3ecffd99e1628e81a5ea5468f7581a1f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Oct 2010 11:11:40 -0400 Subject: new helper: ihold() Clones an existing reference to inode; caller must already hold one. Signed-off-by: Al Viro --- fs/inode.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 430d70f..05ea293 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -320,6 +320,15 @@ void __iget(struct inode *inode) atomic_inc(&inode->i_count); } +/* + * get additional reference to inode; caller must already hold one. + */ +void ihold(struct inode *inode) +{ + WARN_ON(atomic_inc_return(&inode->i_count) < 2); +} +EXPORT_SYMBOL(ihold); + static void inode_lru_list_add(struct inode *inode) { if (list_empty(&inode->i_list)) { -- cgit v1.1 From f991bd2e14210fb93d722cb23e54991de20e8a3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Oct 2010 11:18:01 -0400 Subject: fs: introduce a per-cpu last_ino allocator new_inode() dirties a contended cache line to get increasing inode numbers. This limits performance on workloads that cause significant parallel inode allocation. Solve this problem by using a per_cpu variable fed by the shared last_ino in batches of 1024 allocations. This reduces contention on the shared last_ino, and give same spreading ino numbers than before (i.e. same wraparound after 2^32 allocations). Signed-off-by: Eric Dumazet Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 05ea293..46a3e12 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -717,6 +717,43 @@ repeat: return NULL; } +/* + * Each cpu owns a range of LAST_INO_BATCH numbers. + * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, + * to renew the exhausted range. + * + * This does not significantly increase overflow rate because every CPU can + * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is + * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the + * 2^32 range, and is a worst-case. Even a 50% wastage would only increase + * overflow rate by 2x, which does not seem too significant. + * + * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW + * error if st_ino won't fit in target struct field. Use 32bit counter + * here to attempt to avoid that. + */ +#define LAST_INO_BATCH 1024 +static DEFINE_PER_CPU(unsigned int, last_ino); + +static unsigned int get_next_ino(void) +{ + unsigned int *p = &get_cpu_var(last_ino); + unsigned int res = *p; + +#ifdef CONFIG_SMP + if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { + static atomic_t shared_last_ino; + int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); + + res = next - LAST_INO_BATCH; + } +#endif + + *p = ++res; + put_cpu_var(last_ino); + return res; +} + /** * new_inode - obtain an inode * @sb: superblock @@ -731,12 +768,6 @@ repeat: */ struct inode *new_inode(struct super_block *sb) { - /* - * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW - * error if st_ino won't fit in target struct field. Use 32bit counter - * here to attempt to avoid that. - */ - static unsigned int last_ino; struct inode *inode; spin_lock_prefetch(&inode_lock); @@ -745,7 +776,7 @@ struct inode *new_inode(struct super_block *sb) if (inode) { spin_lock(&inode_lock); __inode_sb_list_add(inode); - inode->i_ino = ++last_ino; + inode->i_ino = get_next_ino(); inode->i_state = 0; spin_unlock(&inode_lock); } -- cgit v1.1 From 85fe4025c616a7c0ed07bc2fc8c5371b07f3888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 11:19:54 -0400 Subject: fs: do not assign default i_ino in new_inode Instead of always assigning an increasing inode number in new_inode move the call to assign it into those callers that actually need it. For now callers that need it is estimated conservatively, that is the call is added to all filesystems that do not assign an i_ino by themselves. For a few more filesystems we can avoid assigning any inode number given that they aren't user visible, and for others it could be done lazily when an inode number is actually needed, but that's left for later patches. Signed-off-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- fs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 46a3e12..2cd2e48 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -735,7 +735,7 @@ repeat: #define LAST_INO_BATCH 1024 static DEFINE_PER_CPU(unsigned int, last_ino); -static unsigned int get_next_ino(void) +unsigned int get_next_ino(void) { unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; @@ -753,6 +753,7 @@ static unsigned int get_next_ino(void) put_cpu_var(last_ino); return res; } +EXPORT_SYMBOL(get_next_ino); /** * new_inode - obtain an inode @@ -776,7 +777,6 @@ struct inode *new_inode(struct super_block *sb) if (inode) { spin_lock(&inode_lock); __inode_sb_list_add(inode); - inode->i_ino = get_next_ino(); inode->i_state = 0; spin_unlock(&inode_lock); } -- cgit v1.1 From 99a38919241fd051b8d93b2e4d0c05ef0556d795 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 19:07:20 +0200 Subject: fs: fix buffer invalidation in invalidate_list We must not call invalidate_inode_buffers in invalidate_list unless the inode can be reclaimed. If we remove the buffer association of a busy inode fsync won't find the buffers anymore. As invalidate_inode_buffers is called from various others sources than umount this actually does matter in practice. While at it change the loop to a more natural form and remove the WARN_ON for I_NEW, wich we already tested a few lines above. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 2cd2e48..4bedac3 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -28,7 +28,6 @@ /* * This is needed for the following functions: * - inode_has_buffers - * - invalidate_inode_buffers * - invalidate_bdev * * FIXME: remove all knowledge of the buffer layer from this file @@ -503,16 +502,15 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) inode = list_entry(tmp, struct inode, i_sb_list); if (inode->i_state & I_NEW) continue; - invalidate_inode_buffers(inode); - if (!atomic_read(&inode->i_count)) { - list_move(&inode->i_list, dispose); - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - percpu_counter_dec(&nr_inodes_unused); + if (atomic_read(&inode->i_count)) { + busy = 1; continue; } - busy = 1; + + list_move(&inode->i_list, dispose); + inode->i_state |= I_FREEING; + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); } return busy; } -- cgit v1.1 From 7ccf19a8042e343f8159f8a5fdd6a9422aa90c78 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 21 Oct 2010 11:49:30 +1100 Subject: fs: inode split IO and LRU lists The use of the same inode list structure (inode->i_list) for two different list constructs with different lifecycles and purposes makes it impossible to separate the locking of the different operations. Therefore, to enable the separation of the locking of the writeback and reclaim lists, split the inode->i_list into two separate lists dedicated to their specific tracking functions. Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 53 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 19 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 4bedac3..09e2d7a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -71,7 +71,7 @@ static unsigned int i_hash_shift __read_mostly; * allowing for low-overhead inode sync() operations. */ -static LIST_HEAD(inode_unused); +static LIST_HEAD(inode_lru); static struct hlist_head *inode_hashtable __read_mostly; /* @@ -271,6 +271,7 @@ EXPORT_SYMBOL(__destroy_inode); static void destroy_inode(struct inode *inode) { + BUG_ON(!list_empty(&inode->i_lru)); __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); @@ -289,7 +290,8 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - INIT_LIST_HEAD(&inode->i_list); + INIT_LIST_HEAD(&inode->i_wb_list); + INIT_LIST_HEAD(&inode->i_lru); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -330,16 +332,16 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { - if (list_empty(&inode->i_list)) { - list_add(&inode->i_list, &inode_unused); + if (list_empty(&inode->i_lru)) { + list_add(&inode->i_lru, &inode_lru); percpu_counter_inc(&nr_inodes_unused); } } static void inode_lru_list_del(struct inode *inode) { - if (!list_empty(&inode->i_list)) { - list_del_init(&inode->i_list); + if (!list_empty(&inode->i_lru)) { + list_del_init(&inode->i_lru); percpu_counter_dec(&nr_inodes_unused); } } @@ -460,8 +462,8 @@ static void dispose_list(struct list_head *head) while (!list_empty(head)) { struct inode *inode; - inode = list_first_entry(head, struct inode, i_list); - list_del_init(&inode->i_list); + inode = list_first_entry(head, struct inode, i_lru); + list_del_init(&inode->i_lru); evict(inode); @@ -507,8 +509,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) continue; } - list_move(&inode->i_list, dispose); inode->i_state |= I_FREEING; + + /* + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. + */ + list_move(&inode->i_lru, dispose); + list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) percpu_counter_dec(&nr_inodes_unused); } @@ -580,10 +588,10 @@ static void prune_icache(int nr_to_scan) for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; - if (list_empty(&inode_unused)) + if (list_empty(&inode_lru)) break; - inode = list_entry(inode_unused.prev, struct inode, i_list); + inode = list_entry(inode_lru.prev, struct inode, i_lru); /* * Referenced or dirty inodes are still in use. Give them @@ -591,14 +599,14 @@ static void prune_icache(int nr_to_scan) */ if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { - list_del_init(&inode->i_list); + list_del_init(&inode->i_lru); percpu_counter_dec(&nr_inodes_unused); continue; } /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { - list_move(&inode->i_list, &inode_unused); + list_move(&inode->i_lru, &inode_lru); inode->i_state &= ~I_REFERENCED; continue; } @@ -611,15 +619,21 @@ static void prune_icache(int nr_to_scan) iput(inode); spin_lock(&inode_lock); - if (inode != list_entry(inode_unused.next, - struct inode, i_list)) + if (inode != list_entry(inode_lru.next, + struct inode, i_lru)) continue; /* wrong inode or list_empty */ if (!can_unuse(inode)) continue; } - list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + + /* + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. + */ + list_move(&inode->i_lru, &freeable); + list_del_init(&inode->i_wb_list); percpu_counter_dec(&nr_inodes_unused); } if (current_is_kswapd()) @@ -1340,15 +1354,16 @@ static void iput_final(struct inode *inode) inode->i_state &= ~I_WILL_FREE; __remove_inode_hash(inode); } + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; /* - * After we delete the inode from the LRU here, we avoid moving dirty - * inodes back onto the LRU now because I_FREEING is set and hence - * writeback_single_inode() won't move the inode around. + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. */ inode_lru_list_del(inode); + list_del_init(&inode->i_wb_list); __inode_sb_list_del(inode); spin_unlock(&inode_lock); -- cgit v1.1 From d895a1c96af8c2a0f6a5e0119695a7c6b92df8db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Oct 2010 19:40:24 +0200 Subject: fs: do not drop inode_lock in dispose_list Despite the comment above it we can not safely drop the lock here. invalidate_list is called from many other places that just umount. Also switch to proper list macros now that we never drop the lock. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 09e2d7a..1bf2be4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -482,26 +482,10 @@ static void dispose_list(struct list_head *head) */ static int invalidate_list(struct list_head *head, struct list_head *dispose) { - struct list_head *next; + struct inode *inode, *next; int busy = 0; - next = head->next; - for (;;) { - struct list_head *tmp = next; - struct inode *inode; - - /* - * We can reschedule here without worrying about the list's - * consistency because the per-sb list of inodes must not - * change during umount anymore, and because iprune_sem keeps - * shrink_icache_memory() away. - */ - cond_resched_lock(&inode_lock); - - next = next->next; - if (tmp == head) - break; - inode = list_entry(tmp, struct inode, i_sb_list); + list_for_each_entry_safe(inode, next, head, i_sb_list) { if (inode->i_state & I_NEW) continue; if (atomic_read(&inode->i_count)) { -- cgit v1.1 From a031878670ac8fe466859d4c1506bd91ae48678c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Oct 2010 19:40:33 +0200 Subject: fs: fold invalidate_list into invalidate_inodes Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 1bf2be4..f6b3b52 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -477,15 +477,24 @@ static void dispose_list(struct list_head *head) } } -/* - * Invalidate all inodes for a device. +/** + * invalidate_inodes - attempt to free all inodes on a superblock + * @sb: superblock to operate on + * + * Attempts to free all inodes for a given superblock. If there were any + * busy inodes return a non-zero value, else zero. */ -static int invalidate_list(struct list_head *head, struct list_head *dispose) +int invalidate_inodes(struct super_block *sb) { - struct inode *inode, *next; int busy = 0; + struct inode *inode, *next; + LIST_HEAD(dispose); - list_for_each_entry_safe(inode, next, head, i_sb_list) { + down_write(&iprune_sem); + + spin_lock(&inode_lock); + fsnotify_unmount_inodes(&sb->s_inodes); + list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { if (inode->i_state & I_NEW) continue; if (atomic_read(&inode->i_count)) { @@ -499,34 +508,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) * Move the inode off the IO lists and LRU once I_FREEING is * set so that it won't get moved back on there if it is dirty. */ - list_move(&inode->i_lru, dispose); + list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) percpu_counter_dec(&nr_inodes_unused); } - return busy; -} - -/** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ -int invalidate_inodes(struct super_block *sb) -{ - int busy; - LIST_HEAD(throw_away); - - down_write(&iprune_sem); - spin_lock(&inode_lock); - fsnotify_unmount_inodes(&sb->s_inodes); - busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); - dispose_list(&throw_away); + dispose_list(&dispose); up_write(&iprune_sem); return busy; -- cgit v1.1 From 63997e98a3be68d7cec806d22bf9b02b2e1daabb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Oct 2010 20:49:35 -0400 Subject: split invalidate_inodes() Pull removal of fsnotify marks into generic_shutdown_super(). Split umount-time work into a new function - evict_inodes(). Make sure that invalidate_inodes() will be able to cope with I_FREEING once we change locking in iput(). Signed-off-by: Al Viro --- fs/inode.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f6b3b52..a6d6068 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -478,6 +478,49 @@ static void dispose_list(struct list_head *head) } /** + * evict_inodes - evict all evictable inodes for a superblock + * @sb: superblock to operate on + * + * Make sure that no inodes with zero refcount are retained. This is + * called by superblock shutdown after having MS_ACTIVE flag removed, + * so any inode reaching zero refcount during or after that call will + * be immediately evicted. + */ +void evict_inodes(struct super_block *sb) +{ + struct inode *inode, *next; + LIST_HEAD(dispose); + + down_write(&iprune_sem); + + spin_lock(&inode_lock); + list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { + if (atomic_read(&inode->i_count)) + continue; + + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + WARN_ON(1); + continue; + } + + inode->i_state |= I_FREEING; + + /* + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. + */ + list_move(&inode->i_lru, &dispose); + list_del_init(&inode->i_wb_list); + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); + } + spin_unlock(&inode_lock); + + dispose_list(&dispose); + up_write(&iprune_sem); +} + +/** * invalidate_inodes - attempt to free all inodes on a superblock * @sb: superblock to operate on * @@ -493,9 +536,8 @@ int invalidate_inodes(struct super_block *sb) down_write(&iprune_sem); spin_lock(&inode_lock); - fsnotify_unmount_inodes(&sb->s_inodes); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - if (inode->i_state & I_NEW) + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) continue; if (atomic_read(&inode->i_count)) { busy = 1; -- cgit v1.1