diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 1058 |
1 files changed, 548 insertions, 510 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d42e6bf..cb10cb9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -38,6 +38,7 @@ #include <linux/falloc.h> #include <linux/slab.h> #include <linux/ratelimit.h> +#include <linux/mount.h> #include "compat.h" #include "ctree.h" #include "disk-io.h" @@ -45,17 +46,17 @@ #include "btrfs_inode.h" #include "ioctl.h" #include "print-tree.h" -#include "volumes.h" #include "ordered-data.h" #include "xattr.h" #include "tree-log.h" +#include "volumes.h" #include "compression.h" #include "locking.h" #include "free-space-cache.h" #include "inode-map.h" struct btrfs_iget_args { - u64 ino; + struct btrfs_key *location; struct btrfs_root *root; }; @@ -93,6 +94,8 @@ static noinline int cow_file_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); +static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode); static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir, @@ -340,6 +343,7 @@ static noinline int compress_file_range(struct inode *inode, int i; int will_compress; int compress_type = root->fs_info->compress_type; + int redirty = 0; /* if this is a small write inside eof, kick off a defragbot */ if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024) @@ -393,11 +397,25 @@ again: (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) { WARN_ON(pages); pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); - BUG_ON(!pages); + if (!pages) { + /* just bail out to the uncompressed code */ + goto cont; + } if (BTRFS_I(inode)->force_compress) compress_type = BTRFS_I(inode)->force_compress; + /* + * we need to call clear_page_dirty_for_io on each + * page in the range. Otherwise applications with the file + * mmap'd can wander in and change the page contents while + * we are compressing them. + * + * If the compression fails for any reason, we set the pages + * dirty again later on. + */ + extent_range_clear_dirty_for_io(inode, start, end); + redirty = 1; ret = btrfs_compress_pages(compress_type, inode->i_mapping, start, total_compressed, pages, @@ -424,6 +442,7 @@ again: will_compress = 1; } } +cont: if (start == 0) { trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); @@ -534,6 +553,8 @@ cleanup_and_bail_uncompressed: __set_page_dirty_nobuffers(locked_page); /* unlocked later on in the async handlers */ } + if (redirty) + extent_range_redirty_for_io(inode, start, end); add_async_extent(async_cow, start, end - start + 1, 0, NULL, 0, BTRFS_COMPRESS_NONE); *num_added += 1; @@ -750,15 +771,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, return alloc_hint; } -static inline bool is_free_space_inode(struct btrfs_root *root, - struct inode *inode) -{ - if (root == root->fs_info->tree_root || - BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) - return true; - return false; -} - /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to @@ -791,7 +803,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(is_free_space_inode(root, inode)); + BUG_ON(btrfs_is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -829,7 +841,7 @@ static noinline int cow_file_range(struct inode *inode, } BUG_ON(disk_num_bytes > - btrfs_super_total_bytes(&root->fs_info->super_copy)); + btrfs_super_total_bytes(root->fs_info->super_copy)); alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); @@ -1070,9 +1082,10 @@ static noinline int run_delalloc_nocow(struct inode *inode, u64 ino = btrfs_ino(inode); path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; - nolock = is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(root, inode); if (nolock) trans = btrfs_join_transaction_nolock(root); @@ -1291,15 +1304,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, return ret; } -static int btrfs_split_extent_hook(struct inode *inode, - struct extent_state *orig, u64 split) +static void btrfs_split_extent_hook(struct inode *inode, + struct extent_state *orig, u64 split) { /* not delalloc, ignore it */ if (!(orig->state & EXTENT_DELALLOC)) - return 0; + return; - atomic_inc(&BTRFS_I(inode)->outstanding_extents); - return 0; + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); } /* @@ -1308,16 +1322,17 @@ static int btrfs_split_extent_hook(struct inode *inode, * extents, such as when we are doing sequential writes, so we can properly * account for the metadata space we'll need. */ -static int btrfs_merge_extent_hook(struct inode *inode, - struct extent_state *new, - struct extent_state *other) +static void btrfs_merge_extent_hook(struct inode *inode, + struct extent_state *new, + struct extent_state *other) { /* not delalloc, ignore it */ if (!(other->state & EXTENT_DELALLOC)) - return 0; + return; - atomic_dec(&BTRFS_I(inode)->outstanding_extents); - return 0; + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); } /* @@ -1325,8 +1340,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, * bytes in this file, and to maintain the list of inodes that * have pending delalloc work to be done. */ -static int btrfs_set_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_set_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* @@ -1337,12 +1352,15 @@ static int btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); - if (*bits & EXTENT_FIRST_DELALLOC) + if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - else - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + } spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += len; @@ -1353,14 +1371,13 @@ static int btrfs_set_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* * extent_io.c clear_bit_hook, see set_bit_hook for why */ -static int btrfs_clear_bit_hook(struct inode *inode, - struct extent_state *state, int *bits) +static void btrfs_clear_bit_hook(struct inode *inode, + struct extent_state *state, int *bits) { /* * set_bit and clear bit hooks normally require _irqsave/restore @@ -1370,12 +1387,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); - if (*bits & EXTENT_FIRST_DELALLOC) + if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - else if (!(*bits & EXTENT_DO_ACCOUNTING)) - atomic_dec(&BTRFS_I(inode)->outstanding_extents); + } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); + } if (*bits & EXTENT_DO_ACCOUNTING) btrfs_delalloc_release_metadata(inode, len); @@ -1394,7 +1414,6 @@ static int btrfs_clear_bit_hook(struct inode *inode, } spin_unlock(&root->fs_info->delalloc_lock); } - return 0; } /* @@ -1477,7 +1496,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(root, inode)) ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); else ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); @@ -1644,7 +1663,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, int ret; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; path->leave_spinning = 1; @@ -1726,7 +1746,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) return 0; BUG_ON(!ordered_extent); - nolock = is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); @@ -1738,7 +1758,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; - ret = btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode_fallback(trans, root, inode); BUG_ON(ret); } goto out; @@ -1787,18 +1807,18 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) &ordered_extent->list); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); - if (!ret) { - ret = btrfs_update_inode(trans, root, inode); + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { + ret = btrfs_update_inode_fallback(trans, root, inode); BUG_ON(ret); } ret = 0; out: - if (nolock) { - if (trans) - btrfs_end_transaction_nolock(trans, root); - } else { + if (root != root->fs_info->tree_root) btrfs_delalloc_release_metadata(inode, ordered_extent->len); - if (trans) + if (trans) { + if (nolock) + btrfs_end_transaction_nolock(trans, root); + else btrfs_end_transaction(trans, root); } @@ -1820,153 +1840,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, } /* - * When IO fails, either with EIO or csum verification fails, we - * try other mirrors that might have a good copy of the data. This - * io_failure_record is used to record state as we go through all the - * mirrors. If another mirror has good data, the page is set up to date - * and things continue. If a good mirror can't be found, the original - * bio end_io callback is called to indicate things have failed. - */ -struct io_failure_record { - struct page *page; - u64 start; - u64 len; - u64 logical; - unsigned long bio_flags; - int last_mirror; -}; - -static int btrfs_io_failed_hook(struct bio *failed_bio, - struct page *page, u64 start, u64 end, - struct extent_state *state) -{ - struct io_failure_record *failrec = NULL; - u64 private; - struct extent_map *em; - struct inode *inode = page->mapping->host; - struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree; - struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; - struct bio *bio; - int num_copies; - int ret; - int rw; - u64 logical; - - ret = get_state_private(failure_tree, start, &private); - if (ret) { - failrec = kmalloc(sizeof(*failrec), GFP_NOFS); - if (!failrec) - return -ENOMEM; - failrec->start = start; - failrec->len = end - start + 1; - failrec->last_mirror = 0; - failrec->bio_flags = 0; - - read_lock(&em_tree->lock); - em = lookup_extent_mapping(em_tree, start, failrec->len); - if (em->start > start || em->start + em->len < start) { - free_extent_map(em); - em = NULL; - } - read_unlock(&em_tree->lock); - - if (IS_ERR_OR_NULL(em)) { - kfree(failrec); - return -EIO; - } - logical = start - em->start; - logical = em->block_start + logical; - if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) { - logical = em->block_start; - failrec->bio_flags = EXTENT_BIO_COMPRESSED; - extent_set_compress_type(&failrec->bio_flags, - em->compress_type); - } - failrec->logical = logical; - free_extent_map(em); - set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | - EXTENT_DIRTY, GFP_NOFS); - set_state_private(failure_tree, start, - (u64)(unsigned long)failrec); - } else { - failrec = (struct io_failure_record *)(unsigned long)private; - } - num_copies = btrfs_num_copies( - &BTRFS_I(inode)->root->fs_info->mapping_tree, - failrec->logical, failrec->len); - failrec->last_mirror++; - if (!state) { - spin_lock(&BTRFS_I(inode)->io_tree.lock); - state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree, - failrec->start, - EXTENT_LOCKED); - if (state && state->start != failrec->start) - state = NULL; - spin_unlock(&BTRFS_I(inode)->io_tree.lock); - } - if (!state || failrec->last_mirror > num_copies) { - set_state_private(failure_tree, failrec->start, 0); - clear_extent_bits(failure_tree, failrec->start, - failrec->start + failrec->len - 1, - EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS); - kfree(failrec); - return -EIO; - } - bio = bio_alloc(GFP_NOFS, 1); - bio->bi_private = state; - bio->bi_end_io = failed_bio->bi_end_io; - bio->bi_sector = failrec->logical >> 9; - bio->bi_bdev = failed_bio->bi_bdev; - bio->bi_size = 0; - - bio_add_page(bio, page, failrec->len, start - page_offset(page)); - if (failed_bio->bi_rw & REQ_WRITE) - rw = WRITE; - else - rw = READ; - - ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, - failrec->last_mirror, - failrec->bio_flags, 0); - return ret; -} - -/* - * each time an IO finishes, we do a fast check in the IO failure tree - * to see if we need to process or clean up an io_failure_record - */ -static int btrfs_clean_io_failures(struct inode *inode, u64 start) -{ - u64 private; - u64 private_failure; - struct io_failure_record *failure; - int ret; - - private = 0; - if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private, - (u64)-1, 1, EXTENT_DIRTY, 0)) { - ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, - start, &private_failure); - if (ret == 0) { - failure = (struct io_failure_record *)(unsigned long) - private_failure; - set_state_private(&BTRFS_I(inode)->io_failure_tree, - failure->start, 0); - clear_extent_bits(&BTRFS_I(inode)->io_failure_tree, - failure->start, - failure->start + failure->len - 1, - EXTENT_DIRTY | EXTENT_LOCKED, - GFP_NOFS); - kfree(failure); - } - } - return 0; -} - -/* * when reads are done, we need to check csums to verify the data is correct - * if there's a match, we allow the bio to finish. If not, we go through - * the io_failure_record routines to find good copies + * if there's a match, we allow the bio to finish. If not, the code in + * extent_io.c will try to find good copies for us. */ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) @@ -2012,10 +1888,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, kunmap_atomic(kaddr, KM_USER0); good: - /* if the io failure tree for this inode is non-empty, - * check to see if we've recovered from a failed IO - */ - btrfs_clean_io_failures(inode, start); return 0; zeroit: @@ -2080,89 +1952,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) up_read(&root->fs_info->cleanup_work_sem); } -/* - * calculate extra metadata reservation when snapshotting a subvolume - * contains orphan files. - */ -void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve) -{ - struct btrfs_root *root; - struct btrfs_block_rsv *block_rsv; - u64 num_bytes; - int index; - - root = pending->root; - if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) - return; - - block_rsv = root->orphan_block_rsv; - - /* orphan block reservation for the snapshot */ - num_bytes = block_rsv->size; - - /* - * after the snapshot is created, COWing tree blocks may use more - * space than it frees. So we should make sure there is enough - * reserved space. - */ - index = trans->transid & 0x1; - if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { - num_bytes += block_rsv->size - - (block_rsv->reserved + block_rsv->freed[index]); - } - - *bytes_to_reserve += num_bytes; -} - -void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending) -{ - struct btrfs_root *root = pending->root; - struct btrfs_root *snap = pending->snap; - struct btrfs_block_rsv *block_rsv; - u64 num_bytes; - int index; - int ret; - - if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) - return; - - /* refill source subvolume's orphan block reservation */ - block_rsv = root->orphan_block_rsv; - index = trans->transid & 0x1; - if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { - num_bytes = block_rsv->size - - (block_rsv->reserved + block_rsv->freed[index]); - ret = btrfs_block_rsv_migrate(&pending->block_rsv, - root->orphan_block_rsv, - num_bytes); - BUG_ON(ret); - } - - /* setup orphan block reservation for the snapshot */ - block_rsv = btrfs_alloc_block_rsv(snap); - BUG_ON(!block_rsv); - - btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - snap->orphan_block_rsv = block_rsv; - - num_bytes = root->orphan_block_rsv->size; - ret = btrfs_block_rsv_migrate(&pending->block_rsv, - block_rsv, num_bytes); - BUG_ON(ret); - -#if 0 - /* insert orphan item for the snapshot */ - WARN_ON(!root->orphan_item_inserted); - ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, - snap->root_key.objectid); - BUG_ON(ret); - snap->orphan_item_inserted = 1; -#endif -} - enum btrfs_orphan_cleanup_state { ORPHAN_CLEANUP_STARTED = 1, ORPHAN_CLEANUP_DONE = 2, @@ -2214,7 +2003,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) if (!root->orphan_block_rsv) { block_rsv = btrfs_alloc_block_rsv(root); - BUG_ON(!block_rsv); + if (!block_rsv) + return -ENOMEM; } spin_lock(&root->orphan_lock); @@ -2247,9 +2037,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) } spin_unlock(&root->orphan_lock); - if (block_rsv) - btrfs_add_durable_block_rsv(root->fs_info, block_rsv); - /* grab metadata reservation from transaction handle */ if (reserve) { ret = btrfs_orphan_reserve_metadata(trans, inode); @@ -2259,7 +2046,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) /* insert an orphan item to track this unlinked/truncated file */ if (insert >= 1) { ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode)); - BUG_ON(ret); + BUG_ON(ret && ret != -EEXIST); } /* insert an orphan item to track subvolume contains orphan files */ @@ -2316,6 +2103,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) struct btrfs_key key, found_key; struct btrfs_trans_handle *trans; struct inode *inode; + u64 last_objectid = 0; int ret = 0, nr_unlink = 0, nr_truncate = 0; if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) @@ -2367,41 +2155,81 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) * crossing root thing. we store the inode number in the * offset of the orphan item. */ + + if (found_key.offset == last_objectid) { + printk(KERN_ERR "btrfs: Error removing orphan entry, " + "stopping orphan cleanup\n"); + ret = -EINVAL; + goto out; + } + + last_objectid = found_key.offset; + found_key.objectid = found_key.offset; found_key.type = BTRFS_INODE_ITEM_KEY; found_key.offset = 0; inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); - if (IS_ERR(inode)) { - ret = PTR_ERR(inode); + ret = PTR_RET(inode); + if (ret && ret != -ESTALE) goto out; - } - /* - * add this inode to the orphan list so btrfs_orphan_del does - * the proper thing when we hit it - */ - spin_lock(&root->orphan_lock); - list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); - spin_unlock(&root->orphan_lock); + if (ret == -ESTALE && root == root->fs_info->tree_root) { + struct btrfs_root *dead_root; + struct btrfs_fs_info *fs_info = root->fs_info; + int is_dead_root = 0; + /* + * this is an orphan in the tree root. Currently these + * could come from 2 sources: + * a) a snapshot deletion in progress + * b) a free space cache inode + * We need to distinguish those two, as the snapshot + * orphan must not get deleted. + * find_dead_roots already ran before us, so if this + * is a snapshot deletion, we should find the root + * in the dead_roots list + */ + spin_lock(&fs_info->trans_lock); + list_for_each_entry(dead_root, &fs_info->dead_roots, + root_list) { + if (dead_root->root_key.objectid == + found_key.objectid) { + is_dead_root = 1; + break; + } + } + spin_unlock(&fs_info->trans_lock); + if (is_dead_root) { + /* prevent this orphan from being found again */ + key.offset = found_key.objectid - 1; + continue; + } + } /* - * if this is a bad inode, means we actually succeeded in - * removing the inode, but not the orphan record, which means - * we need to manually delete the orphan since iput will just - * do a destroy_inode + * Inode is already gone but the orphan item is still there, + * kill the orphan item. */ - if (is_bad_inode(inode)) { - trans = btrfs_start_transaction(root, 0); + if (ret == -ESTALE) { + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { ret = PTR_ERR(trans); goto out; } - btrfs_orphan_del(trans, inode); + ret = btrfs_del_orphan_item(trans, root, + found_key.objectid); + BUG_ON(ret); btrfs_end_transaction(trans, root); - iput(inode); continue; } + /* + * add this inode to the orphan list so btrfs_orphan_del does + * the proper thing when we hit it + */ + spin_lock(&root->orphan_lock); + list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); + spin_unlock(&root->orphan_lock); + /* if we have links, this was a truncate, lets do that */ if (inode->i_nlink) { if (!S_ISREG(inode->i_mode)) { @@ -2410,7 +2238,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) continue; } nr_truncate++; + /* + * Need to hold the imutex for reservation purposes, not + * a huge deal here but I have a WARN_ON in + * btrfs_delalloc_reserve_space to catch offenders. + */ + mutex_lock(&inode->i_mutex); ret = btrfs_truncate(inode); + mutex_unlock(&inode->i_mutex); } else { nr_unlink++; } @@ -2420,6 +2255,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) if (ret) goto out; } + /* release the path since we're done with it */ + btrfs_release_path(path); + root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; if (root->orphan_block_rsv) @@ -2516,7 +2354,9 @@ static void btrfs_read_locked_inode(struct inode *inode) filled = true; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + goto make_bad; + path->leave_spinning = 1; memcpy(&location, &BTRFS_I(inode)->location, sizeof(location)); @@ -2531,15 +2371,8 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - if (!leaf->map_token) - map_private_extent_buffer(leaf, (unsigned long)inode_item, - sizeof(struct btrfs_inode_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - inode->i_mode = btrfs_inode_mode(leaf, inode_item); - inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); + set_nlink(inode, btrfs_inode_nlink(leaf, inode_item)); inode->i_uid = btrfs_inode_uid(leaf, inode_item); inode->i_gid = btrfs_inode_gid(leaf, inode_item); btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item)); @@ -2575,11 +2408,6 @@ cache_acl: if (!maybe_acls) cache_no_acl(inode); - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - btrfs_free_path(path); switch (inode->i_mode & S_IFMT) { @@ -2624,13 +2452,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { - if (!leaf->map_token) - map_private_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_inode_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); @@ -2659,17 +2480,12 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, 0); - - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } } /* * copy everything in the in-memory inode into the btree. */ -noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, +static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct inode *inode) { struct btrfs_inode_item *inode_item; @@ -2677,21 +2493,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; int ret; - /* - * If the inode is a free space inode, we can deadlock during commit - * if we put it into the delayed code. - * - * The data relocation inode should also be directly updated - * without delay - */ - if (!is_free_space_inode(root, inode) - && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { - ret = btrfs_delayed_update_inode(trans, root, inode); - if (!ret) - btrfs_set_inode_last_trans(trans, inode); - return ret; - } - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2720,6 +2521,43 @@ failed: } /* + * copy everything in the in-memory inode into the btree. + */ +noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) +{ + int ret; + + /* + * If the inode is a free space inode, we can deadlock during commit + * if we put it into the delayed code. + * + * The data relocation inode should also be directly updated + * without delay + */ + if (!btrfs_is_free_space_inode(root, inode) + && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { + ret = btrfs_delayed_update_inode(trans, root, inode); + if (!ret) + btrfs_set_inode_last_trans(trans, inode); + return ret; + } + + return btrfs_update_inode_item(trans, root, inode); +} + +static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct inode *inode) +{ + int ret; + + ret = btrfs_update_inode(trans, root, inode); + if (ret == -ENOSPC) + return btrfs_update_inode_item(trans, root, inode); + return ret; +} + +/* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and * also drops the back refs in the inode to the directory @@ -2857,7 +2695,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, u64 ino = btrfs_ino(inode); u64 dir_ino = btrfs_ino(dir); - trans = btrfs_start_transaction(root, 10); + /* + * 1 for the possible orphan item + * 1 for the dir item + * 1 for the dir index + * 1 for the inode ref + * 1 for the inode ref in the tree log + * 2 for the dir entries in the log + * 1 for the inode + */ + trans = btrfs_start_transaction(root, 8); if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) return trans; @@ -2880,7 +2727,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, return ERR_PTR(-ENOMEM); } - trans = btrfs_start_transaction(root, 0); + /* 1 for the orphan item */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { btrfs_free_path(path); root->fs_info->enospc_unlink = 0; @@ -2985,6 +2833,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, err = 0; out: btrfs_free_path(path); + /* Migrate the orphan reservation over */ + if (!err) + err = btrfs_block_rsv_migrate(trans->block_rsv, + &root->fs_info->global_block_rsv, + trans->bytes_reserved); + if (err) { btrfs_end_transaction(trans, root); root->fs_info->enospc_unlink = 0; @@ -2999,6 +2853,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans, struct btrfs_root *root) { if (trans->block_rsv == &root->fs_info->global_block_rsv) { + btrfs_block_rsv_release(root, trans->block_rsv, + trans->bytes_reserved); + trans->block_rsv = &root->fs_info->trans_block_rsv; BUG_ON(!root->fs_info->enospc_unlink); root->fs_info->enospc_unlink = 0; } @@ -3021,13 +2878,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, dentry->d_name.name, dentry->d_name.len); - BUG_ON(ret); + if (ret) + goto out; if (inode->i_nlink == 0) { ret = btrfs_orphan_add(trans, inode); - BUG_ON(ret); + if (ret) + goto out; } +out: nr = trans->blocks_used; __unlink_end_trans(trans, root); btrfs_btree_balance_dirty(root, nr); @@ -3170,6 +3030,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = -1; + if (root->ref_cows || root == root->fs_info->tree_root) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); @@ -3182,10 +3047,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, if (min_type == 0 && root == BTRFS_I(inode)->root) btrfs_kill_delayed_inode_items(inode); - path = btrfs_alloc_path(); - BUG_ON(!path); - path->reada = -1; - key.objectid = ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -3386,6 +3247,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) pgoff_t index = from >> PAGE_CACHE_SHIFT; unsigned offset = from & (PAGE_CACHE_SIZE-1); struct page *page; + gfp_t mask = btrfs_alloc_write_mask(mapping); int ret = 0; u64 page_start; u64 page_end; @@ -3398,7 +3260,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ret = -ENOMEM; again: - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, mask); if (!page) { btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); goto out; @@ -3519,7 +3381,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) u64 hint_byte = 0; hole_size = last_byte - cur_offset; - trans = btrfs_start_transaction(root, 2); + trans = btrfs_start_transaction(root, 3); if (IS_ERR(trans)) { err = PTR_ERR(trans); break; @@ -3528,19 +3390,26 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, &hint_byte, 1); - if (err) + if (err) { + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); break; + } err = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); - if (err) + if (err) { + btrfs_update_inode(trans, root, inode); + btrfs_end_transaction(trans, root); break; + } btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); + btrfs_update_inode(trans, root, inode); btrfs_end_transaction(trans, root); } free_extent_map(em); @@ -3558,6 +3427,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) static int btrfs_setsize(struct inode *inode, loff_t newsize) { + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; loff_t oldsize = i_size_read(inode); int ret; @@ -3565,16 +3436,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) return 0; if (newsize > oldsize) { - i_size_write(inode, newsize); - btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); truncate_pagecache(inode, oldsize, newsize); ret = btrfs_cont_expand(inode, oldsize, newsize); - if (ret) { - btrfs_setsize(inode, oldsize); + if (ret) return ret; - } - mark_inode_dirty(inode); + trans = btrfs_start_transaction(root, 1); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + i_size_write(inode, newsize); + btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL); + ret = btrfs_update_inode(trans, root, inode); + btrfs_end_transaction_throttle(trans, root); } else { /* @@ -3614,9 +3488,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_valid) { setattr_copy(inode, attr); - mark_inode_dirty(inode); + err = btrfs_dirty_inode(inode); - if (attr->ia_valid & ATTR_MODE) + if (!err && attr->ia_valid & ATTR_MODE) err = btrfs_acl_chmod(inode); } @@ -3627,6 +3501,8 @@ void btrfs_evict_inode(struct inode *inode) { struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_block_rsv *rsv, *global_rsv; + u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); unsigned long nr; int ret; @@ -3634,7 +3510,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - is_free_space_inode(root, inode))) + btrfs_is_free_space_inode(root, inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -3642,7 +3518,8 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } /* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */ - btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (!special_file(inode->i_mode)) + btrfs_wait_ordered_range(inode, 0, (u64)-1); if (root->fs_info->log_root_recovering) { BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); @@ -3654,22 +3531,55 @@ void btrfs_evict_inode(struct inode *inode) goto no_delete; } + rsv = btrfs_alloc_block_rsv(root); + if (!rsv) { + btrfs_orphan_del(NULL, inode); + goto no_delete; + } + rsv->size = min_size; + global_rsv = &root->fs_info->global_block_rsv; + btrfs_i_size_write(inode, 0); + /* + * This is a bit simpler than btrfs_truncate since + * + * 1) We've already reserved our space for our orphan item in the + * unlink. + * 2) We're going to delete the inode item, so we don't need to update + * it at all. + * + * So we just need to reserve some slack space in case we add bytes when + * doing the truncate. + */ while (1) { - trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); - trans->block_rsv = root->orphan_block_rsv; + ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); + + /* + * Try and steal from the global reserve since we will + * likely not use this space anyway, we want to try as + * hard as possible to get this to work. + */ + if (ret) + ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size); - ret = btrfs_block_rsv_check(trans, root, - root->orphan_block_rsv, 0, 5); if (ret) { - BUG_ON(ret != -EAGAIN); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - continue; + printk(KERN_WARNING "Could not get space for a " + "delete, will truncate on mount %d\n", ret); + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; + } + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + btrfs_orphan_del(NULL, inode); + btrfs_free_block_rsv(root, rsv); + goto no_delete; } + trans->block_rsv = rsv; + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); if (ret != -EAGAIN) break; @@ -3678,14 +3588,17 @@ void btrfs_evict_inode(struct inode *inode) btrfs_end_transaction(trans, root); trans = NULL; btrfs_btree_balance_dirty(root, nr); - } + btrfs_free_block_rsv(root, rsv); + if (ret == 0) { + trans->block_rsv = root->orphan_block_rsv; ret = btrfs_orphan_del(trans, inode); BUG_ON(ret); } + trans->block_rsv = &root->fs_info->trans_block_rsv; if (!(root == root->fs_info->tree_root || root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) btrfs_return_ino(root, btrfs_ino(inode)); @@ -3713,7 +3626,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry, int ret = 0; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return -ENOMEM; di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name, namelen, 0); @@ -3934,7 +3848,9 @@ again: static int btrfs_init_locked_inode(struct inode *inode, void *p) { struct btrfs_iget_args *args = p; - inode->i_ino = args->ino; + inode->i_ino = args->location->objectid; + memcpy(&BTRFS_I(inode)->location, args->location, + sizeof(*args->location)); BTRFS_I(inode)->root = args->root; btrfs_set_inode_space_info(args->root, inode); return 0; @@ -3943,20 +3859,20 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p) static int btrfs_find_actor(struct inode *inode, void *opaque) { struct btrfs_iget_args *args = opaque; - return args->ino == btrfs_ino(inode) && + return args->location->objectid == BTRFS_I(inode)->location.objectid && args->root == BTRFS_I(inode)->root; } static struct inode *btrfs_iget_locked(struct super_block *s, - u64 objectid, + struct btrfs_key *location, struct btrfs_root *root) { struct inode *inode; struct btrfs_iget_args args; - args.ino = objectid; + args.location = location; args.root = root; - inode = iget5_locked(s, objectid, btrfs_find_actor, + inode = iget5_locked(s, location->objectid, btrfs_find_actor, btrfs_init_locked_inode, (void *)&args); return inode; @@ -3970,18 +3886,22 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, { struct inode *inode; - inode = btrfs_iget_locked(s, location->objectid, root); + inode = btrfs_iget_locked(s, location, root); if (!inode) return ERR_PTR(-ENOMEM); if (inode->i_state & I_NEW) { - BTRFS_I(inode)->root = root; - memcpy(&BTRFS_I(inode)->location, location, sizeof(*location)); btrfs_read_locked_inode(inode); - inode_tree_add(inode); - unlock_new_inode(inode); - if (new) - *new = 1; + if (!is_bad_inode(inode)) { + inode_tree_add(inode); + unlock_new_inode(inode); + if (new) + *new = 1; + } else { + unlock_new_inode(inode); + iput(inode); + inode = ERR_PTR(-ESTALE); + } } return inode; @@ -4016,12 +3936,20 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct btrfs_root *sub_root = root; struct btrfs_key location; int index; - int ret; + int ret = 0; if (dentry->d_name.len > BTRFS_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - ret = btrfs_inode_by_name(dir, dentry, &location); + if (unlikely(d_need_lookup(dentry))) { + memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); + kfree(dentry->d_fsdata); + dentry->d_fsdata = NULL; + /* This thing is hashed, drop it for now */ + d_drop(dentry); + } else { + ret = btrfs_inode_by_name(dir, dentry, &location); + } if (ret < 0) return ERR_PTR(ret); @@ -4076,16 +4004,24 @@ static int btrfs_dentry_delete(const struct dentry *dentry) return 0; } +static void btrfs_dentry_release(struct dentry *dentry) +{ + if (dentry->d_fsdata) + kfree(dentry->d_fsdata); +} + static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - struct inode *inode; - - inode = btrfs_lookup_dentry(dir, dentry); - if (IS_ERR(inode)) - return ERR_CAST(inode); + struct dentry *ret; - return d_splice_alias(inode, dentry); + ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); + if (unlikely(d_need_lookup(dentry))) { + spin_lock(&dentry->d_lock); + dentry->d_flags &= ~DCACHE_NEED_LOOKUP; + spin_unlock(&dentry->d_lock); + } + return ret; } unsigned char btrfs_filetype_table[] = { @@ -4104,6 +4040,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_path *path; struct list_head ins_list; struct list_head del_list; + struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4194,6 +4131,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; + struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4214,6 +4152,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); + q.name = name_ptr; + q.len = name_len; + q.hash = full_name_hash(q.name, q.len); + tmp = d_lookup(filp->f_dentry, &q); + if (!tmp) { + struct btrfs_key *newkey; + + newkey = kzalloc(sizeof(struct btrfs_key), + GFP_NOFS); + if (!newkey) + goto no_dentry; + tmp = d_alloc(filp->f_dentry, &q); + if (!tmp) { + kfree(newkey); + dput(tmp); + goto no_dentry; + } + memcpy(newkey, &location, + sizeof(struct btrfs_key)); + tmp->d_fsdata = newkey; + tmp->d_flags |= DCACHE_NEED_LOOKUP; + d_rehash(tmp); + dput(tmp); + } else { + dput(tmp); + } +no_dentry: /* is this a reference to our own snapshot? If so * skip it */ @@ -4278,7 +4243,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (BTRFS_I(inode)->dummy_inode) return 0; - if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { @@ -4302,42 +4267,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) * FIXME, needs more benchmarking...there are no reasons other than performance * to keep or drop this code. */ -void btrfs_dirty_inode(struct inode *inode, int flags) +int btrfs_dirty_inode(struct inode *inode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; int ret; if (BTRFS_I(inode)->dummy_inode) - return; + return 0; trans = btrfs_join_transaction(root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, inode); if (ret && ret == -ENOSPC) { /* whoops, lets try again with the full transaction */ btrfs_end_transaction(trans, root); trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - printk_ratelimited(KERN_ERR "btrfs: fail to " - "dirty inode %llu error %ld\n", - (unsigned long long)btrfs_ino(inode), - PTR_ERR(trans)); - return; - } + if (IS_ERR(trans)) + return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, inode); - if (ret) { - printk_ratelimited(KERN_ERR "btrfs: fail to " - "dirty inode %llu error %d\n", - (unsigned long long)btrfs_ino(inode), - ret); - } } btrfs_end_transaction(trans, root); if (BTRFS_I(inode)->delayed_node) btrfs_balance_delayed_items(root); + + return ret; +} + +/* + * This is a copy of file_update_time. We need this so we can return error on + * ENOSPC for updating the inode in the case of file write and mmap writes. + */ +int btrfs_update_time(struct file *file) +{ + struct inode *inode = file->f_path.dentry->d_inode; + struct timespec now; + int ret; + enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0; + + /* First try to exhaust all avenues to not sync */ + if (IS_NOCMTIME(inode)) + return 0; + + now = current_fs_time(inode->i_sb); + if (!timespec_equal(&inode->i_mtime, &now)) + sync_it = S_MTIME; + + if (!timespec_equal(&inode->i_ctime, &now)) + sync_it |= S_CTIME; + + if (IS_I_VERSION(inode)) + sync_it |= S_VERSION; + + if (!sync_it) + return 0; + + /* Finally allowed to write? Takes lock. */ + if (mnt_want_write_file(file)) + return 0; + + /* Only change inode inside the lock region */ + if (sync_it & S_VERSION) + inode_inc_iversion(inode); + if (sync_it & S_CTIME) + inode->i_ctime = now; + if (sync_it & S_MTIME) + inode->i_mtime = now; + ret = btrfs_dirty_inode(inode); + if (!ret) + mark_inode_dirty_sync(inode); + mnt_drop_write(file->f_path.mnt); + return ret; } /* @@ -4439,7 +4442,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, int owner; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) + return ERR_PTR(-ENOMEM); inode = new_inode(root->fs_info->sb); if (!inode) { @@ -4474,7 +4478,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, inode->i_generation = BTRFS_I(inode)->generation; btrfs_set_inode_space_info(root, inode); - if (mode & S_IFDIR) + if (S_ISDIR(mode)) owner = 0; else owner = 1; @@ -4519,7 +4523,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, btrfs_inherit_iflags(inode, dir); - if ((mode & S_IFREG)) { + if (S_ISREG(mode)) { if (btrfs_test_opt(root, NODATASUM)) BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; if (btrfs_test_opt(root, NODATACOW) || @@ -4601,10 +4605,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans, int err = btrfs_add_link(trans, dir, inode, dentry->d_name.name, dentry->d_name.len, backref, index); - if (!err) { - d_instantiate(dentry, inode); - return 0; - } if (err > 0) err = -EEXIST; return err; @@ -4652,13 +4652,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, goto out_unlock; } + /* + * If the active LSM wants to access the inode during + * d_instantiate it needs these. Smack checks to see + * if the filesystem supports xattrs by looking at the + * ops vector. + */ + + inode->i_op = &btrfs_special_inode_operations; err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { - inode->i_op = &btrfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); btrfs_update_inode(trans, root, inode); + d_instantiate(dentry, inode); } out_unlock: nr = trans->blocks_used; @@ -4710,15 +4718,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, goto out_unlock; } + /* + * If the active LSM wants to access the inode during + * d_instantiate it needs these. Smack checks to see + * if the filesystem supports xattrs by looking at the + * ops vector. + */ + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { inode->i_mapping->a_ops = &btrfs_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + d_instantiate(dentry, inode); } out_unlock: nr = trans->blocks_used; @@ -4773,11 +4789,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (err) { drop_inode = 1; } else { - struct dentry *parent = dget_parent(dentry); + struct dentry *parent = dentry->d_parent; err = btrfs_update_inode(trans, root, inode); BUG_ON(err); + d_instantiate(dentry, inode); btrfs_log_new_name(trans, inode, NULL, parent); - dput(parent); } nr = trans->blocks_used; @@ -5757,8 +5773,7 @@ again: if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { ret = btrfs_ordered_update_i_size(inode, 0, ordered); if (!ret) - ret = btrfs_update_inode(trans, root, inode); - err = ret; + err = btrfs_update_inode_fallback(trans, root, inode); goto out; } @@ -5795,8 +5810,8 @@ again: add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); ret = btrfs_ordered_update_i_size(inode, 0, ordered); - if (!ret) - btrfs_update_inode(trans, root, inode); + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) + btrfs_update_inode_fallback(trans, root, inode); ret = 0; out_unlock: unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, @@ -6251,7 +6266,7 @@ int btrfs_readpage(struct file *file, struct page *page) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; - return extent_read_full_page(tree, page, btrfs_get_extent); + return extent_read_full_page(tree, page, btrfs_get_extent, 0); } static int btrfs_writepage(struct page *page, struct writeback_control *wbc) @@ -6402,7 +6417,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) u64 page_start; u64 page_end; + /* Need this to keep space reservations serialized */ + mutex_lock(&inode->i_mutex); ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); + mutex_unlock(&inode->i_mutex); + if (!ret) + ret = btrfs_update_time(vma->vm_file); if (ret) { if (ret == -ENOMEM) ret = VM_FAULT_OOM; @@ -6503,6 +6523,7 @@ static int btrfs_truncate(struct inode *inode) struct btrfs_trans_handle *trans; unsigned long nr; u64 mask = root->sectorsize - 1; + u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) @@ -6550,19 +6571,23 @@ static int btrfs_truncate(struct inode *inode) rsv = btrfs_alloc_block_rsv(root); if (!rsv) return -ENOMEM; - btrfs_add_durable_block_rsv(root->fs_info, rsv); + rsv->size = min_size; + /* + * 1 for the truncate slack space + * 1 for the orphan item we're going to add + * 1 for the orphan item deletion + * 1 for updating the inode. + */ trans = btrfs_start_transaction(root, 4); if (IS_ERR(trans)) { err = PTR_ERR(trans); goto out; } - /* - * Reserve space for the truncate process. Truncate should be adding - * space, but if there are snapshots it may end up using space. - */ - ret = btrfs_truncate_reserve_metadata(trans, root, rsv); + /* Migrate the slack space for the truncate to our reserve */ + ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv, + min_size); BUG_ON(ret); ret = btrfs_orphan_add(trans, inode); @@ -6571,21 +6596,6 @@ static int btrfs_truncate(struct inode *inode) goto out; } - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - - /* - * Ok so we've already migrated our bytes over for the truncate, so here - * just reserve the one slot we need for updating the inode. - */ - trans = btrfs_start_transaction(root, 1); - if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out; - } - trans->block_rsv = rsv; - /* * setattr is responsible for setting the ordered_data_close flag, * but that is only tested during the last file release. That @@ -6607,20 +6617,31 @@ static int btrfs_truncate(struct inode *inode) btrfs_add_ordered_operation(trans, root, inode); while (1) { + ret = btrfs_block_rsv_refill(root, rsv, min_size); + if (ret) { + /* + * This can only happen with the original transaction we + * started above, every other time we shouldn't have a + * transaction started yet. + */ + if (ret == -EAGAIN) + goto end_trans; + err = ret; + break; + } + if (!trans) { - trans = btrfs_start_transaction(root, 3); + /* Just need the 1 for updating the inode */ + trans = btrfs_start_transaction(root, 1); if (IS_ERR(trans)) { - err = PTR_ERR(trans); - goto out; + ret = err = PTR_ERR(trans); + trans = NULL; + break; } - - ret = btrfs_truncate_reserve_metadata(trans, root, - rsv); - BUG_ON(ret); - - trans->block_rsv = rsv; } + trans->block_rsv = rsv; + ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, BTRFS_EXTENT_DATA_KEY); @@ -6635,7 +6656,7 @@ static int btrfs_truncate(struct inode *inode) err = ret; break; } - +end_trans: nr = trans->blocks_used; btrfs_end_transaction(trans, root); trans = NULL; @@ -6655,14 +6676,16 @@ static int btrfs_truncate(struct inode *inode) ret = btrfs_orphan_del(NULL, inode); } - trans->block_rsv = &root->fs_info->trans_block_rsv; - ret = btrfs_update_inode(trans, root, inode); - if (ret && !err) - err = ret; + if (trans) { + trans->block_rsv = &root->fs_info->trans_block_rsv; + ret = btrfs_update_inode(trans, root, inode); + if (ret && !err) + err = ret; - nr = trans->blocks_used; - ret = btrfs_end_transaction_throttle(trans, root); - btrfs_btree_balance_dirty(root, nr); + nr = trans->blocks_used; + ret = btrfs_end_transaction_throttle(trans, root); + btrfs_btree_balance_dirty(root, nr); + } out: btrfs_free_block_rsv(root, rsv); @@ -6690,7 +6713,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, inode->i_op = &btrfs_dir_inode_operations; inode->i_fop = &btrfs_dir_file_operations; - inode->i_nlink = 1; + set_nlink(inode, 1); btrfs_i_size_write(inode, 0); err = btrfs_update_inode(trans, new_root, inode); @@ -6700,19 +6723,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, return 0; } -/* helper function for file defrag and space balancing. This - * forces readahead on a given range of bytes in an inode - */ -unsigned long btrfs_force_ra(struct address_space *mapping, - struct file_ra_state *ra, struct file *file, - pgoff_t offset, pgoff_t last_index) -{ - pgoff_t req_size = last_index - offset + 1; - - page_cache_sync_readahead(mapping, ra, file, offset, req_size); - return offset + req_size; -} - struct inode *btrfs_alloc_inode(struct super_block *sb) { struct btrfs_inode *ei; @@ -6730,19 +6740,21 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->last_sub_trans = 0; ei->logged_trans = 0; ei->delalloc_bytes = 0; - ei->reserved_bytes = 0; ei->disk_i_size = 0; ei->flags = 0; + ei->csum_bytes = 0; ei->index_cnt = (u64)-1; ei->last_unlink_trans = 0; - atomic_set(&ei->outstanding_extents, 0); - atomic_set(&ei->reserved_extents, 0); + spin_lock_init(&ei->lock); + ei->outstanding_extents = 0; + ei->reserved_extents = 0; ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; ei->dummy_inode = 0; ei->in_defrag = 0; + ei->delalloc_meta_reserved = 0; ei->force_compress = BTRFS_COMPRESS_NONE; ei->delayed_node = NULL; @@ -6775,8 +6787,10 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); - WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); + WARN_ON(BTRFS_I(inode)->outstanding_extents); + WARN_ON(BTRFS_I(inode)->reserved_extents); + WARN_ON(BTRFS_I(inode)->delalloc_bytes); + WARN_ON(BTRFS_I(inode)->csum_bytes); /* * This can happen where we create an inode, but somebody else also @@ -6831,7 +6845,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - !is_free_space_inode(root, inode)) + !btrfs_is_free_space_inode(root, inode)) return 1; else return generic_drop_inode(inode); @@ -6900,11 +6914,13 @@ static int btrfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; + u32 blocksize = inode->i_sb->s_blocksize; + generic_fillattr(inode, stat); - stat->dev = BTRFS_I(inode)->root->anon_super.s_dev; + stat->dev = BTRFS_I(inode)->root->anon_dev; stat->blksize = PAGE_CACHE_SIZE; - stat->blocks = (inode_get_bytes(inode) + - BTRFS_I(inode)->delalloc_bytes) >> 9; + stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + + ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; return 0; } @@ -7069,9 +7085,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, BUG_ON(ret); if (old_ino != BTRFS_FIRST_FREE_OBJECTID) { - struct dentry *parent = dget_parent(new_dentry); + struct dentry *parent = new_dentry->d_parent; btrfs_log_new_name(trans, old_inode, old_dir, parent); - dput(parent); btrfs_end_log_trans(root); } out_fail: @@ -7181,21 +7196,32 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, goto out_unlock; } + /* + * If the active LSM wants to access the inode during + * d_instantiate it needs these. Smack checks to see + * if the filesystem supports xattrs by looking at the + * ops vector. + */ + inode->i_fop = &btrfs_file_operations; + inode->i_op = &btrfs_file_inode_operations; + err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); if (err) drop_inode = 1; else { inode->i_mapping->a_ops = &btrfs_aops; inode->i_mapping->backing_dev_info = &root->fs_info->bdi; - inode->i_fop = &btrfs_file_operations; - inode->i_op = &btrfs_file_inode_operations; BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; } if (drop_inode) goto out_unlock; path = btrfs_alloc_path(); - BUG_ON(!path); + if (!path) { + err = -ENOMEM; + drop_inode = 1; + goto out_unlock; + } key.objectid = btrfs_ino(inode); key.offset = 0; btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); @@ -7233,6 +7259,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, drop_inode = 1; out_unlock: + if (!err) + d_instantiate(dentry, inode); nr = trans->blocks_used; btrfs_end_transaction_throttle(trans, root); if (drop_inode) { @@ -7332,15 +7360,19 @@ static int btrfs_set_page_dirty(struct page *page) return __set_page_dirty_nobuffers(page); } -static int btrfs_permission(struct inode *inode, int mask, unsigned int flags) +static int btrfs_permission(struct inode *inode, int mask) { struct btrfs_root *root = BTRFS_I(inode)->root; + umode_t mode = inode->i_mode; - if (btrfs_root_readonly(root) && (mask & MAY_WRITE)) - return -EROFS; - if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE)) - return -EACCES; - return generic_permission(inode, mask, flags, btrfs_check_acl); + if (mask & MAY_WRITE && + (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) { + if (btrfs_root_readonly(root)) + return -EROFS; + if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) + return -EACCES; + } + return generic_permission(inode, mask); } static const struct inode_operations btrfs_dir_inode_operations = { @@ -7360,10 +7392,12 @@ static const struct inode_operations btrfs_dir_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .permission = btrfs_permission, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, + .get_acl = btrfs_get_acl, }; static const struct file_operations btrfs_dir_file_operations = { @@ -7385,7 +7419,6 @@ static struct extent_io_ops btrfs_extent_io_ops = { .readpage_end_io_hook = btrfs_readpage_end_io_hook, .writepage_end_io_hook = btrfs_writepage_end_io_hook, .writepage_start_hook = btrfs_writepage_start_hook, - .readpage_io_failed_hook = btrfs_io_failed_hook, .set_bit_hook = btrfs_set_bit_hook, .clear_bit_hook = btrfs_clear_bit_hook, .merge_extent_hook = btrfs_merge_extent_hook, @@ -7432,6 +7465,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, .fiemap = btrfs_fiemap, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7441,19 +7475,23 @@ static const struct inode_operations btrfs_special_inode_operations = { .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, .put_link = page_put_link, .getattr = btrfs_getattr, + .setattr = btrfs_setattr, .permission = btrfs_permission, .setxattr = btrfs_setxattr, .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, + .get_acl = btrfs_get_acl, }; const struct dentry_operations btrfs_dentry_operations = { .d_delete = btrfs_dentry_delete, + .d_release = btrfs_dentry_release, }; |