diff options
Diffstat (limited to 'fs')
41 files changed, 562 insertions, 402 deletions
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9e60fd2..a7528b9 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -108,7 +108,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) Node *fmt; struct file * interp_file = NULL; char iname[BINPRM_BUF_SIZE]; - char *iname_addr = iname; + const char *iname_addr = iname; int retval; int fd_binary = -1; diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index aca9d55..396a988 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c @@ -16,7 +16,8 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) { - char *cp, *i_name, *i_arg; + const char *i_arg, *i_name; + char *cp; struct file *file; char interp[BINPRM_BUF_SIZE]; int retval; diff --git a/fs/buffer.c b/fs/buffer.c index 50efa33..3e7dca2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_unwritten(bh)); /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - - /* * Only clear out a write error when rewriting */ if (test_set_buffer_req(bh) && (rw & WRITE)) @@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index a53b130..1e7a330 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb, } } else { inode = iget_locked(sb, CRAMINO(cramfs_inode)); - if (inode) { + if (inode && (inode->i_state & I_NEW)) { setup_inode(inode, cramfs_inode); unlock_new_inode(inode); } diff --git a/fs/dcache.c b/fs/dcache.c index 4d13bf5..83293be 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci); * d_lookup - search for a dentry * @parent: parent dentry * @name: qstr of name we wish to find + * Returns: dentry, or NULL * - * Searches the children of the parent dentry for the name in question. If - * the dentry is found its reference count is incremented and the dentry - * is returned. The caller must use dput to free the entry when it has - * finished using it. %NULL is returned on failure. - * - * __d_lookup is dcache_lock free. The hash list is protected using RCU. - * Memory barriers are used while updating and doing lockless traversal. - * To avoid races with d_move while rename is happening, d_lock is used. - * - * Overflows in memcmp(), while d_move, are avoided by keeping the length - * and name pointer in one structure pointed by d_qstr. - * - * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while - * lookup is going on. - * - * The dentry unused LRU is not updated even if lookup finds the required dentry - * in there. It is updated in places such as prune_dcache, shrink_dcache_sb, - * select_parent and __dget_locked. This laziness saves lookup from dcache_lock - * acquisition. - * - * d_lookup() is protected against the concurrent renames in some unrelated - * directory using the seqlockt_t rename_lock. + * d_lookup searches the children of the parent dentry for the name in + * question. If the dentry is found its reference count is incremented and the + * dentry is returned. The caller must use dput to free the entry when it has + * finished using it. %NULL is returned if the dentry does not exist. */ - struct dentry * d_lookup(struct dentry * parent, struct qstr * name) { struct dentry * dentry = NULL; @@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name) } EXPORT_SYMBOL(d_lookup); +/* + * __d_lookup - search for a dentry (racy) + * @parent: parent dentry + * @name: qstr of name we wish to find + * Returns: dentry, or NULL + * + * __d_lookup is like d_lookup, however it may (rarely) return a + * false-negative result due to unrelated rename activity. + * + * __d_lookup is slightly faster by avoiding rename_lock read seqlock, + * however it must be used carefully, eg. with a following d_lookup in + * the case of failure. + * + * __d_lookup callers must be commented. + */ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) { unsigned int len = name->len; @@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) struct hlist_node *node; struct dentry *dentry; + /* + * The hash list is protected using RCU. + * + * Take d_lock when comparing a candidate dentry, to avoid races + * with d_move(). + * + * It is possible that concurrent renames can mess up our list + * walk here and result in missing our dentry, resulting in the + * false-negative result. d_lookup() protects against concurrent + * renames using rename_lock seqlock. + * + * See Documentation/vfs/dcache-locking.txt for more details. + */ rcu_read_lock(); hlist_for_each_entry_rcu(dentry, node, head, d_hash) { @@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) /* * Recheck the dentry after taking the lock - d_move may have - * changed things. Don't bother checking the hash because we're - * about to compare the whole name anyway. + * changed things. Don't bother checking the hash because + * we're about to compare the whole name anyway. */ if (dentry->d_parent != parent) goto next; @@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root, bool slash = false; int error = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); while (dentry != root->dentry || vfsmnt != root->mnt) { struct dentry * parent; @@ -1954,7 +1964,7 @@ out: if (!error && !slash) error = prepend(buffer, buflen, "/", 1); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return error; global_root: @@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2) struct vfsmount *mnt = path1->mnt; struct dentry *dentry = path1->dentry; int res; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if (mnt != path2->mnt) { for (;;) { if (mnt->mnt_parent == mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } if (mnt->mnt_parent == path2->mnt) @@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2) dentry = mnt->mnt_mountpoint; } res = is_subdir(dentry, path2->dentry); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } EXPORT_SYMBOL(path_is_under); @@ -361,13 +361,13 @@ err: /* * count() counts the number of strings in array ARGV. */ -static int count(char __user * __user * argv, int max) +static int count(const char __user * const __user * argv, int max) { int i = 0; if (argv != NULL) { for (;;) { - char __user * p; + const char __user * p; if (get_user(p, argv)) return -EFAULT; @@ -387,7 +387,7 @@ static int count(char __user * __user * argv, int max) * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ -static int copy_strings(int argc, char __user * __user * argv, +static int copy_strings(int argc, const char __user *const __user *argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; @@ -396,7 +396,7 @@ static int copy_strings(int argc, char __user * __user * argv, int ret; while (argc-- > 0) { - char __user *str; + const char __user *str; int len; unsigned long pos; @@ -470,12 +470,13 @@ out: /* * Like copy_strings, but get argv and its values from kernel memory. */ -int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm) +int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) { int r; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - r = copy_strings(argc, (char __user * __user *)argv, bprm); + r = copy_strings(argc, (const char __user *const __user *)argv, bprm); set_fs(oldfs); return r; } @@ -997,7 +998,7 @@ EXPORT_SYMBOL(flush_old_exec); void setup_new_exec(struct linux_binprm * bprm) { int i, ch; - char * name; + const char *name; char tcomm[sizeof(current->comm)]; arch_pick_mmap_layout(current->mm); @@ -1117,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; - write_lock(&p->fs->lock); + spin_lock(&p->fs->lock); rcu_read_lock(); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1134,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm) res = 1; } } - write_unlock(&p->fs->lock); + spin_unlock(&p->fs->lock); return res; } @@ -1316,9 +1317,9 @@ EXPORT_SYMBOL(search_binary_handler); /* * sys_execve() executes a new program. */ -int do_execve(char * filename, - char __user *__user *argv, - char __user *__user *envp, +int do_execve(const char * filename, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs * regs) { struct linux_binprm *bprm; diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6..1736f23 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/file_table.c b/fs/file_table.c index edecd36..a04bdd8 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -20,7 +20,9 @@ #include <linux/cdev.h> #include <linux/fsnotify.h> #include <linux/sysctl.h> +#include <linux/lglock.h> #include <linux/percpu_counter.h> +#include <linux/percpu.h> #include <linux/ima.h> #include <asm/atomic.h> @@ -32,8 +34,8 @@ struct files_stat_struct files_stat = { .max_files = NR_FILE }; -/* public. Not pretty! */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock); +DECLARE_LGLOCK(files_lglock); +DEFINE_LGLOCK(files_lglock); /* SLAB cache for file structures */ static struct kmem_cache *filp_cachep __read_mostly; @@ -249,7 +251,7 @@ static void __fput(struct file *file) cdev_put(inode->i_cdev); fops_put(file->f_op); put_pid(file->f_owner.pid); - file_kill(file); + file_sb_list_del(file); if (file->f_mode & FMODE_WRITE) drop_file_write_access(file); file->f_path.dentry = NULL; @@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed) return file; } - void put_filp(struct file *file) { if (atomic_long_dec_and_test(&file->f_count)) { security_file_free(file); - file_kill(file); + file_sb_list_del(file); file_free(file); } } -void file_move(struct file *file, struct list_head *list) +static inline int file_list_cpu(struct file *file) { - if (!list) - return; - file_list_lock(); - list_move(&file->f_u.fu_list, list); - file_list_unlock(); +#ifdef CONFIG_SMP + return file->f_sb_list_cpu; +#else + return smp_processor_id(); +#endif +} + +/* helper for file_sb_list_add to reduce ifdefs */ +static inline void __file_sb_list_add(struct file *file, struct super_block *sb) +{ + struct list_head *list; +#ifdef CONFIG_SMP + int cpu; + cpu = smp_processor_id(); + file->f_sb_list_cpu = cpu; + list = per_cpu_ptr(sb->s_files, cpu); +#else + list = &sb->s_files; +#endif + list_add(&file->f_u.fu_list, list); } -void file_kill(struct file *file) +/** + * file_sb_list_add - add a file to the sb's file list + * @file: file to add + * @sb: sb to add it to + * + * Use this function to associate a file with the superblock of the inode it + * refers to. + */ +void file_sb_list_add(struct file *file, struct super_block *sb) +{ + lg_local_lock(files_lglock); + __file_sb_list_add(file, sb); + lg_local_unlock(files_lglock); +} + +/** + * file_sb_list_del - remove a file from the sb's file list + * @file: file to remove + * @sb: sb to remove it from + * + * Use this function to remove a file from its superblock. + */ +void file_sb_list_del(struct file *file) { if (!list_empty(&file->f_u.fu_list)) { - file_list_lock(); + lg_local_lock_cpu(files_lglock, file_list_cpu(file)); list_del_init(&file->f_u.fu_list); - file_list_unlock(); + lg_local_unlock_cpu(files_lglock, file_list_cpu(file)); } } +#ifdef CONFIG_SMP + +/* + * These macros iterate all files on all CPUs for a given superblock. + * files_lglock must be held globally. + */ +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + int i; \ + for_each_possible_cpu(i) { \ + struct list_head *list; \ + list = per_cpu_ptr((__sb)->s_files, i); \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ + } \ +} + +#else + +#define do_file_list_for_each_entry(__sb, __file) \ +{ \ + struct list_head *list; \ + list = &(sb)->s_files; \ + list_for_each_entry((__file), list, f_u.fu_list) + +#define while_file_list_for_each_entry \ +} + +#endif + int fs_may_remount_ro(struct super_block *sb) { struct file *file; - /* Check that no files are currently opened for writing. */ - file_list_lock(); - list_for_each_entry(file, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, file) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ @@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb) /* Writeable file? */ if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE)) goto too_bad; - } - file_list_unlock(); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); return 1; /* Tis' cool bro. */ too_bad: - file_list_unlock(); + lg_global_unlock(files_lglock); return 0; } @@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb) struct file *f; retry: - file_list_lock(); - list_for_each_entry(f, &sb->s_files, f_u.fu_list) { + lg_global_lock(files_lglock); + do_file_list_for_each_entry(sb, f) { struct vfsmount *mnt; if (!S_ISREG(f->f_path.dentry->d_inode->i_mode)) continue; @@ -408,16 +476,13 @@ retry: continue; file_release_write(f); mnt = mntget(f->f_path.mnt); - file_list_unlock(); - /* - * This can sleep, so we can't hold - * the file_list_lock() spinlock. - */ + /* This can sleep, so we can't hold the spinlock. */ + lg_global_unlock(files_lglock); mnt_drop_write(mnt); mntput(mnt); goto retry; - } - file_list_unlock(); + } while_file_list_for_each_entry; + lg_global_unlock(files_lglock); } void __init files_init(unsigned long mempages) @@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages) if (files_stat.max_files < NR_FILE) files_stat.max_files = NR_FILE; files_defer_init(); + lg_lock_init(files_lglock); percpu_counter_init(&nr_files, 0); } diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 1ee40eb..ed45a9c 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path) { struct path old_root; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_root = fs->root; fs->root = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_root.dentry) path_put(&old_root); } @@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path) { struct path old_pwd; - write_lock(&fs->lock); + spin_lock(&fs->lock); old_pwd = fs->pwd; fs->pwd = *path; path_get(path); - write_unlock(&fs->lock); + spin_unlock(&fs->lock); if (old_pwd.dentry) path_put(&old_pwd); @@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) task_lock(p); fs = p->fs; if (fs) { - write_lock(&fs->lock); + spin_lock(&fs->lock); if (fs->root.dentry == old_root->dentry && fs->root.mnt == old_root->mnt) { path_get(new_root); @@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) fs->pwd = *new_root; count++; } - write_unlock(&fs->lock); + spin_unlock(&fs->lock); } task_unlock(p); } while_each_thread(g, p); @@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk) if (fs) { int kill; task_lock(tsk); - write_lock(&fs->lock); + spin_lock(&fs->lock); tsk->fs = NULL; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(tsk); if (kill) free_fs_struct(fs); @@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) if (fs) { fs->users = 1; fs->in_exec = 0; - rwlock_init(&fs->lock); + spin_lock_init(&fs->lock); fs->umask = old->umask; get_fs_root_and_pwd(old, &fs->root, &fs->pwd); } @@ -121,10 +121,10 @@ int unshare_fs_struct(void) return -ENOMEM; task_lock(current); - write_lock(&fs->lock); + spin_lock(&fs->lock); kill = !--fs->users; current->fs = new_fs; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) @@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { .users = 1, - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; @@ -156,14 +156,14 @@ void daemonize_fs_struct(void) task_lock(current); - write_lock(&init_fs.lock); + spin_lock(&init_fs.lock); init_fs.users++; - write_unlock(&init_fs.lock); + spin_unlock(&init_fs.lock); - write_lock(&fs->lock); + spin_lock(&fs->lock); current->fs = &init_fs; kill = !--fs->users; - write_unlock(&fs->lock); + spin_unlock(&fs->lock); task_unlock(current); if (kill) diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 99800e5..6bc9e3a 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value, if (error < 0) goto failed; inode->i_mode = mode; + inode->i_ctime = CURRENT_TIME; if (error == 0) { posix_acl_release(acl); acl = NULL; diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index dd1e555..f7dc9b5 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name) __putname(name); return NULL; } - strncpy(name, root, PATH_MAX); + strlcpy(name, root, PATH_MAX); if (len > p - name) { __putname(name); return NULL; @@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) char *path = dentry_name(dentry); int err = -ENOMEM; if (path) { - int err = hostfs_do_readlink(path, link, PATH_MAX); + err = hostfs_do_readlink(path, link, PATH_MAX); if (err == PATH_MAX) err = -E2BIG; __putname(path); diff --git a/fs/internal.h b/fs/internal.h index 6b706bc..a6910e9 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -9,6 +9,8 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/lglock.h> + struct super_block; struct linux_binprm; struct path; @@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); -extern spinlock_t vfsmount_lock; +DECLARE_BRLOCK(vfsmount_lock); + /* * fs_struct.c @@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *); /* * file_table.c */ +extern void file_sb_list_add(struct file *f, struct super_block *sb); +extern void file_sb_list_del(struct file *f); extern void mark_files_ro(struct super_block *); extern struct file *get_empty_filp(void); diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd..05a38b9 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9dda..95d8c11 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94..2c4b1f1 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad71732..d290183 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f..5247e7f 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8..7c068c1 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); - - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866a..0e8014e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06..9ad321f 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/mbcache.c b/fs/mbcache.c index cf4e6cd..9344474 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c @@ -80,6 +80,7 @@ struct mb_cache { struct list_head c_cache_list; const char *c_name; atomic_t c_entry_count; + int c_max_entries; int c_bucket_bits; struct kmem_cache *c_entry_cache; struct list_head *c_block_hash; @@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits) if (!cache->c_entry_cache) goto fail2; + /* + * Set an upper limit on the number of cache entries so that the hash + * chains won't grow too long. + */ + cache->c_max_entries = bucket_count << 4; + spin_lock(&mb_cache_spinlock); list_add(&cache->c_cache_list, &mb_cache_list); spin_unlock(&mb_cache_spinlock); @@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache) kfree(cache); } - /* * mb_cache_entry_alloc() * @@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache) struct mb_cache_entry * mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) { - struct mb_cache_entry *ce; - - ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); - if (ce) { + struct mb_cache_entry *ce = NULL; + + if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { + spin_lock(&mb_cache_spinlock); + if (!list_empty(&mb_cache_lru_list)) { + ce = list_entry(mb_cache_lru_list.next, + struct mb_cache_entry, e_lru_list); + list_del_init(&ce->e_lru_list); + __mb_cache_entry_unhash(ce); + } + spin_unlock(&mb_cache_spinlock); + } + if (!ce) { + ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); + if (!ce) + return NULL; atomic_inc(&cache->c_entry_count); INIT_LIST_HEAD(&ce->e_lru_list); INIT_LIST_HEAD(&ce->e_block_list); ce->e_cache = cache; - ce->e_used = 1 + MB_CACHE_WRITER; ce->e_queued = 0; } + ce->e_used = 1 + MB_CACHE_WRITER; return ce; } @@ -595,15 +595,16 @@ int follow_up(struct path *path) { struct vfsmount *parent; struct dentry *mountpoint; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); parent = path->mnt->mnt_parent; if (parent == path->mnt) { - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return 0; } mntget(parent); mountpoint = dget(path->mnt->mnt_mountpoint); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); dput(path->dentry); path->dentry = mountpoint; mntput(path->mnt); @@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd) } /* + * Allocate a dentry with name and parent, and perform a parent + * directory ->lookup on it. Returns the new dentry, or ERR_PTR + * on error. parent->d_inode->i_mutex must be held. d_lookup must + * have verified that no child exists while under i_mutex. + */ +static struct dentry *d_alloc_and_lookup(struct dentry *parent, + struct qstr *name, struct nameidata *nd) +{ + struct inode *inode = parent->d_inode; + struct dentry *dentry; + struct dentry *old; + + /* Don't create child dentry for a dead directory. */ + if (unlikely(IS_DEADDIR(inode))) + return ERR_PTR(-ENOENT); + + dentry = d_alloc(parent, name); + if (unlikely(!dentry)) + return ERR_PTR(-ENOMEM); + + old = inode->i_op->lookup(inode, dentry, nd); + if (unlikely(old)) { + dput(dentry); + dentry = old; + } + return dentry; +} + +/* * It's more convoluted than I'd like it to be, but... it's still fairly * small and for now I'd prefer to have fast path as straight as possible. * It _is_ time-critical. @@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name, return err; } + /* + * Rename seqlock is not required here because in the off chance + * of a false negative due to a concurrent rename, we're going to + * do the non-racy lookup, below. + */ dentry = __d_lookup(nd->path.dentry, name); if (!dentry) goto need_lookup; +found: if (dentry->d_op && dentry->d_op->d_revalidate) goto need_revalidate; done: @@ -724,56 +760,28 @@ need_lookup: mutex_lock(&dir->i_mutex); /* * First re-do the cached lookup just in case it was created - * while we waited for the directory semaphore.. + * while we waited for the directory semaphore, or the first + * lookup failed due to an unrelated rename. * - * FIXME! This could use version numbering or similar to - * avoid unnecessary cache lookups. - * - * The "dcache_lock" is purely to protect the RCU list walker - * from concurrent renames at this point (we mustn't get false - * negatives from the RCU list walk here, unlike the optimistic - * fast walk). - * - * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup + * This could use version numbering or similar to avoid unnecessary + * cache lookups, but then we'd have to do the first lookup in the + * non-racy way. However in the common case here, everything should + * be hot in cache, so would it be a big win? */ dentry = d_lookup(parent, name); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(dir)) - goto out_unlock; - - new = d_alloc(parent, name); - dentry = ERR_PTR(-ENOMEM); - if (new) { - dentry = dir->i_op->lookup(dir, new, nd); - if (dentry) - dput(new); - else - dentry = new; - } -out_unlock: + if (likely(!dentry)) { + dentry = d_alloc_and_lookup(parent, name, nd); mutex_unlock(&dir->i_mutex); if (IS_ERR(dentry)) goto fail; goto done; } - /* * Uhhuh! Nasty case: the cache was re-populated while * we waited on the semaphore. Need to revalidate. */ mutex_unlock(&dir->i_mutex); - if (dentry->d_op && dentry->d_op->d_revalidate) { - dentry = do_revalidate(dentry, nd); - if (!dentry) - dentry = ERR_PTR(-ENOENT); - } - if (IS_ERR(dentry)) - goto fail; - goto done; + goto found; need_revalidate: dentry = do_revalidate(dentry, nd); @@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name, goto out; } - dentry = __d_lookup(base, name); - - /* lockess __d_lookup may fail due to concurrent d_move() - * in some unrelated directory, so try with d_lookup + /* + * Don't bother with __d_lookup: callers are for creat as + * well as unlink, so a lot of the time it would cost + * a double lookup. */ - if (!dentry) - dentry = d_lookup(base, name); + dentry = d_lookup(base, name); if (dentry && dentry->d_op && dentry->d_op->d_revalidate) dentry = do_revalidate(dentry, nd); - if (!dentry) { - struct dentry *new; - - /* Don't create child dentry for a dead directory. */ - dentry = ERR_PTR(-ENOENT); - if (IS_DEADDIR(inode)) - goto out; - - new = d_alloc(base, name); - dentry = ERR_PTR(-ENOMEM); - if (!new) - goto out; - dentry = inode->i_op->lookup(inode, new, nd); - if (!dentry) - dentry = new; - else - dput(new); - } + if (!dentry) + dentry = d_alloc_and_lookup(base, name, nd); out: return dentry; } diff --git a/fs/namespace.c b/fs/namespace.c index 2e10cb1..de402eb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -11,6 +11,8 @@ #include <linux/syscalls.h> #include <linux/slab.h> #include <linux/sched.h> +#include <linux/spinlock.h> +#include <linux/percpu.h> #include <linux/smp_lock.h> #include <linux/init.h> #include <linux/kernel.h> @@ -38,12 +40,10 @@ #define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head)) #define HASH_SIZE (1UL << HASH_SHIFT) -/* spinlock for vfsmount related operations, inplace of dcache_lock */ -__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock); - static int event; static DEFINE_IDA(mnt_id_ida); static DEFINE_IDA(mnt_group_ida); +static DEFINE_SPINLOCK(mnt_id_lock); static int mnt_id_start = 0; static int mnt_group_start = 1; @@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem; struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); +/* + * vfsmount lock may be taken for read to prevent changes to the + * vfsmount hash, ie. during mountpoint lookups or walking back + * up the tree. + * + * It should be taken for write in all cases where the vfsmount + * tree or hash is modified or when a vfsmount structure is modified. + */ +DEFINE_BRLOCK(vfsmount_lock); + static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) { unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES); @@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry) #define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16) -/* allocation is serialized by namespace_sem */ +/* + * allocation is serialized by namespace_sem, but we need the spinlock to + * serialize with freeing. + */ static int mnt_alloc_id(struct vfsmount *mnt) { int res; retry: ida_pre_get(&mnt_id_ida, GFP_KERNEL); - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id); if (!res) mnt_id_start = mnt->mnt_id + 1; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); if (res == -EAGAIN) goto retry; @@ -86,11 +99,11 @@ retry: static void mnt_free_id(struct vfsmount *mnt) { int id = mnt->mnt_id; - spin_lock(&vfsmount_lock); + spin_lock(&mnt_id_lock); ida_remove(&mnt_id_ida, id); if (mnt_id_start > id) mnt_id_start = id; - spin_unlock(&vfsmount_lock); + spin_unlock(&mnt_id_lock); } /* @@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt) { int ret = 0; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags |= MNT_WRITE_HOLD; /* * After storing MNT_WRITE_HOLD, we'll read the counters. This store @@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt) */ smp_wmb(); mnt->mnt_flags &= ~MNT_WRITE_HOLD; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); return ret; } static void __mnt_unmake_readonly(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_flags &= ~MNT_READONLY; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) @@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt) /* * find the first or last mount at @dentry on vfsmount @mnt depending on * @dir. If @dir is set return the first mount else return the last mount. + * vfsmount_lock must be held for read or write. */ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, int dir) @@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *lookup_mnt(struct path *path) { struct vfsmount *child_mnt; - spin_lock(&vfsmount_lock); + + br_read_lock(vfsmount_lock); if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1))) mntget(child_mnt); - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return child_mnt; } @@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt) return mnt->mnt_ns == current->nsproxy->mnt_ns; } +/* + * vfsmount lock must be held for write + */ static void touch_mnt_namespace(struct mnt_namespace *ns) { if (ns) { @@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void __touch_mnt_namespace(struct mnt_namespace *ns) { if (ns && ns->event != event) { @@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns) } } +/* + * vfsmount lock must be held for write + */ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) { old_path->dentry = mnt->mnt_mountpoint; @@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path) old_path->dentry->d_mounted--; } +/* + * vfsmount lock must be held for write + */ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, struct vfsmount *child_mnt) { @@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry, dentry->d_mounted++; } +/* + * vfsmount lock must be held for write + */ static void attach_mnt(struct vfsmount *mnt, struct path *path) { mnt_set_mountpoint(path->mnt, path->dentry, mnt); @@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path) } /* - * the caller must hold vfsmount_lock + * vfsmount lock must be held for write */ static void commit_tree(struct vfsmount *mnt) { @@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt) void mntput_no_expire(struct vfsmount *mnt) { repeat: - if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) { - if (likely(!mnt->mnt_pinned)) { - spin_unlock(&vfsmount_lock); - __mntput(mnt); - return; - } - atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); - mnt->mnt_pinned = 0; - spin_unlock(&vfsmount_lock); - acct_auto_close_mnt(mnt); - goto repeat; + if (atomic_add_unless(&mnt->mnt_count, -1, 1)) + return; + br_write_lock(vfsmount_lock); + if (!atomic_dec_and_test(&mnt->mnt_count)) { + br_write_unlock(vfsmount_lock); + return; + } + if (likely(!mnt->mnt_pinned)) { + br_write_unlock(vfsmount_lock); + __mntput(mnt); + return; } + atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count); + mnt->mnt_pinned = 0; + br_write_unlock(vfsmount_lock); + acct_auto_close_mnt(mnt); + goto repeat; } - EXPORT_SYMBOL(mntput_no_expire); void mnt_pin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt->mnt_pinned++; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_pin); void mnt_unpin(struct vfsmount *mnt) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (mnt->mnt_pinned) { atomic_inc(&mnt->mnt_count); mnt->mnt_pinned--; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } EXPORT_SYMBOL(mnt_unpin); @@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p) struct mnt_namespace *ns = p->ns; int res = 0; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (p->event != ns->event) { p->event = ns->event; res = 1; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); return res; } @@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt) int minimum_refs = 0; struct vfsmount *p; - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); for (p = mnt; p; p = next_mnt(p, mnt)) { actual_refs += atomic_read(&p->mnt_count); minimum_refs += 2; } - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); if (actual_refs > minimum_refs) return 0; @@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt) { int ret = 1; down_read(&namespace_sem); - spin_lock(&vfsmount_lock); + br_read_lock(vfsmount_lock); if (propagate_mount_busy(mnt, 2)) ret = 0; - spin_unlock(&vfsmount_lock); + br_read_unlock(vfsmount_lock); up_read(&namespace_sem); return ret; } @@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head) if (mnt->mnt_parent != mnt) { struct dentry *dentry; struct vfsmount *m; - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); dentry = mnt->mnt_mountpoint; m = mnt->mnt_parent; mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; m->mnt_ghosts--; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); dput(dentry); mntput(m); } @@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head) } } +/* + * vfsmount lock must be held for write + * namespace_sem must be held for write + */ void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill) { struct vfsmount *p; @@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags) } down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); event++; if (!(flags & MNT_DETACH)) @@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags) umount_tree(mnt, 1, &umount_list); retval = 0; } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); return retval; @@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry, q = clone_mnt(p, p->mnt_root, flag); if (!q) goto Enomem; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&q->mnt_list, &res->mnt_list); attach_mnt(q, &path); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } } return res; Enomem: if (res) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(res, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } return NULL; @@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt) { LIST_HEAD(umount_list); down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); } @@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, if (err) goto out_cleanup_ids; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (IS_MNT_SHARED(dest_mnt)) { for (p = source_mnt; p; p = next_mnt(p, source_mnt)) @@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt, list_del_init(&child->mnt_hash); commit_tree(child); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); + return 0; out_cleanup_ids: @@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag) goto out_unlock; } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL)) change_mnt_propagation(m, type); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); out_unlock: up_write(&namespace_sem); @@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name, err = graft_tree(mnt, path); if (err) { LIST_HEAD(umount_list); - spin_lock(&vfsmount_lock); + + br_write_lock(vfsmount_lock); umount_tree(mnt, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); } @@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags, else err = do_remount_sb(sb, flags, data, 0); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK; path->mnt->mnt_flags = mnt_flags; - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } up_write(&sb->s_umount); if (!err) { - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); touch_mnt_namespace(path->mnt->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); } return err; } @@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); /* extract from the expiration list every vfsmount that matches the * following criteria: @@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts) touch_mnt_namespace(mnt->mnt_ns); umount_tree(mnt, 1, &umounts); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umounts); @@ -1830,6 +1871,8 @@ resume: /* * process a list of expirable mountpoints with the intent of discarding any * submounts of a specific parent mountpoint + * + * vfsmount_lock must be held for write */ static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts) { @@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, kfree(new_ns); return ERR_PTR(-ENOMEM); } - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); list_add_tail(&new_ns->list, &new_ns->root->mnt_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); /* * Second pass: switch the tsk->fs->* elements and mark new vfsmounts @@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, goto out2; /* not attached */ /* make sure we can reach put_old from new_root */ tmp = old.mnt; - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); if (tmp != new.mnt) { for (;;) { if (tmp->mnt_parent == tmp) @@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* mount new_root on / */ attach_mnt(new.mnt, &root_parent); touch_mnt_namespace(current->nsproxy->mnt_ns); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); chroot_fs_refs(&root, &new); error = 0; path_put(&root_parent); @@ -2279,7 +2322,7 @@ out1: out0: return error; out3: - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); goto out2; } @@ -2326,6 +2369,8 @@ void __init mnt_init(void) for (u = 0; u < HASH_SIZE; u++) INIT_LIST_HEAD(&mount_hashtable[u]); + br_lock_init(vfsmount_lock); + err = sysfs_init(); if (err) printk(KERN_WARNING "%s: sysfs_init error: %d\n", @@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns) if (!atomic_dec_and_test(&ns->count)) return; down_write(&namespace_sem); - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); umount_tree(ns->root, 0, &umount_list); - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); kfree(ns); diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 26a510a..6c2aad4 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -63,7 +63,6 @@ config NFS_V3_ACL config NFS_V4 bool "NFS client support for NFS version 4" depends on NFS_FS - select RPCSEC_GSS_KRB5 help This option enables support for version 4 of the NFS protocol (RFC 3530) in the kernel's NFS client. diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 29539ce..e257172 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp) /* Call generic open code in order to cache credentials */ res = nfs_open(inode, filp); + if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { + /* This is a mountpoint, so d_revalidate will never + * have been called, so we need to refresh the + * inode (for close-open consistency) ourselves. + */ + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } return res; } @@ -1103,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) goto no_open_dput; /* We can't create new files, or truncate existing ones here */ - openflags &= ~(O_CREAT|O_TRUNC); + openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); /* * Note: we're not holding inode->i_mutex and so may be racing with diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2d141a7..eb51bd6 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync) have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); if (have_error) ret = xchg(&ctx->error, 0); - if (!ret) + if (!ret && status < 0) ret = status; return ret; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ffbb98..089da5b 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) struct rpc_cred *cred; struct nfs4_state *state; struct dentry *res; - fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); + int open_flags = nd->intent.open.flags; + fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); if (nd->flags & LOOKUP_CREATE) { attr.ia_mode = nd->intent.open.create_mode; @@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) if (!IS_POSIXACL(dir)) attr.ia_mode &= ~current_umask(); } else { + open_flags &= ~O_EXCL; attr.ia_valid = 0; - BUG_ON(nd->intent.open.flags & O_CREAT); + BUG_ON(open_flags & O_CREAT); } cred = rpc_lookup_cred(); @@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) parent = dentry->d_parent; /* Protect against concurrent sillydeletes */ nfs_block_sillyrename(parent); - state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred); + state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); put_rpccred(cred); if (IS_ERR(state)) { if (PTR_ERR(state) == -ENOENT) { @@ -2273,8 +2275,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct out: if (page) __free_page(page); - if (locations) - kfree(locations); + kfree(locations); return status; } diff --git a/fs/nfs/super.c b/fs/nfs/super.c index ee26316..ec3966e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, if (nfss->options & NFS_OPTION_FSCACHE) seq_printf(m, ",fsc"); + + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { + if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + seq_printf(m, ",lookupcache=none"); + else + seq_printf(m, ",lookupcache=pos"); + } } /* diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 503b9da..95932f5 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -69,7 +69,6 @@ config NFSD_V4 depends on NFSD && PROC_FS && EXPERIMENTAL select NFSD_V3 select FS_POSIX_ACL - select RPCSEC_GSS_KRB5 help This option enables support in your system's NFS server for version 4 of the NFS protocol (RFC 3530). diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9..9222633 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); @@ -400,9 +400,10 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) list_add(&sbi->s_list, &nilfs->ns_supers); up_write(&nilfs->ns_super_sem); + err = -ENOMEM; sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); if (!sbi->s_ifile) - return -ENOMEM; + goto delist; down_read(&nilfs->ns_segctor_sem); err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, @@ -433,6 +434,7 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) nilfs_mdt_destroy(sbi->s_ifile); sbi->s_ifile = NULL; + delist: down_write(&nilfs->ns_super_sem); list_del_init(&sbi->s_list); up_write(&nilfs->ns_super_sem); diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 37de1f0..4317f17 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c @@ -608,11 +608,11 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, return -EINVAL; } - if (swp) { + if (!valid[!swp]) printk(KERN_WARNING "NILFS warning: broken superblock. " "using spare superblock.\n"); + if (swp) nilfs_swap_super_block(nilfs); - } nilfs->ns_sbwcount = 0; nilfs->ns_sbwtime = le64_to_cpu(sbp[0]->s_wtime); @@ -775,6 +775,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, start * sects_per_block, nblocks * sects_per_block, GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); if (ret < 0) return ret; @@ -785,7 +786,8 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, ret = blkdev_issue_discard(nilfs->ns_bdev, start * sects_per_block, nblocks * sects_per_block, - GFP_NOFS, BLKDEV_IFL_BARRIER); + GFP_NOFS, + BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); return ret; } @@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt, f->f_path.mnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); - file_move(f, &inode->i_sb->s_files); + file_sb_list_add(f, inode->i_sb); error = security_dentry_open(f, cred); if (error) @@ -721,7 +721,7 @@ cleanup_all: mnt_drop_write(mnt); } } - file_kill(f); + file_sb_list_del(f); f->f_path.dentry = NULL; f->f_path.mnt = NULL; cleanup_file: @@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt) return 0; } +/* + * vfsmount lock must be held for write + */ void change_mnt_propagation(struct vfsmount *mnt, int type) { if (type == MS_SHARED) { @@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry, prev_src_mnt = child; } out: - spin_lock(&vfsmount_lock); + br_write_lock(vfsmount_lock); while (!list_empty(&tmp_list)) { child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash); umount_tree(child, 0, &umount_list); } - spin_unlock(&vfsmount_lock); + br_write_unlock(vfsmount_lock); release_mounts(&umount_list); return ret; } @@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count) * other mounts its parent propagates to. * Check if any of these mounts that **do not have submounts** * have more references than 'refcnt'. If so return busy. + * + * vfsmount lock must be held for read or write */ int propagate_mount_busy(struct vfsmount *mnt, int refcnt) { @@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt) * collect all mounts that receive propagation from the mount in @list, * and return these additional mounts in the same list. * @list: the list of mounts to be unmounted. + * + * vfsmount lock must be held for write */ int propagate_umount(struct list_head *list) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ae35413..caa7583 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode) dquot_drop(inode); inode->i_blocks = 0; reiserfs_write_unlock_once(inode->i_sb, depth); + return; no_delete: end_writeback(inode); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b..812e2c0 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); @@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type) s = NULL; goto out; } +#ifdef CONFIG_SMP + s->s_files = alloc_percpu(struct list_head); + if (!s->s_files) { + security_sb_free(s); + kfree(s); + s = NULL; + goto out; + } else { + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i)); + } +#else INIT_LIST_HEAD(&s->s_files); +#endif INIT_LIST_HEAD(&s->s_instances); INIT_HLIST_HEAD(&s->s_anon); INIT_LIST_HEAD(&s->s_inodes); @@ -108,6 +123,9 @@ out: */ static inline void destroy_super(struct super_block *s) { +#ifdef CONFIG_SMP + free_percpu(s->s_files); +#endif security_sb_free(s); kfree(s->s_subtype); kfree(s->s_options); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484f..46f7a80 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e..2eabf04 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb1..a58f915 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9..d2c36d5 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036..9f8775c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) |