aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c1058
1 files changed, 548 insertions, 510 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d42e6bf..cb10cb9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -38,6 +38,7 @@
#include <linux/falloc.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <linux/mount.h>
#include "compat.h"
#include "ctree.h"
#include "disk-io.h"
@@ -45,17 +46,17 @@
#include "btrfs_inode.h"
#include "ioctl.h"
#include "print-tree.h"
-#include "volumes.h"
#include "ordered-data.h"
#include "xattr.h"
#include "tree-log.h"
+#include "volumes.h"
#include "compression.h"
#include "locking.h"
#include "free-space-cache.h"
#include "inode-map.h"
struct btrfs_iget_args {
- u64 ino;
+ struct btrfs_key *location;
struct btrfs_root *root;
};
@@ -93,6 +94,8 @@ static noinline int cow_file_range(struct inode *inode,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
+static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode);
static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *dir,
@@ -340,6 +343,7 @@ static noinline int compress_file_range(struct inode *inode,
int i;
int will_compress;
int compress_type = root->fs_info->compress_type;
+ int redirty = 0;
/* if this is a small write inside eof, kick off a defragbot */
if (end <= BTRFS_I(inode)->disk_i_size && (end - start + 1) < 16 * 1024)
@@ -393,11 +397,25 @@ again:
(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS))) {
WARN_ON(pages);
pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
- BUG_ON(!pages);
+ if (!pages) {
+ /* just bail out to the uncompressed code */
+ goto cont;
+ }
if (BTRFS_I(inode)->force_compress)
compress_type = BTRFS_I(inode)->force_compress;
+ /*
+ * we need to call clear_page_dirty_for_io on each
+ * page in the range. Otherwise applications with the file
+ * mmap'd can wander in and change the page contents while
+ * we are compressing them.
+ *
+ * If the compression fails for any reason, we set the pages
+ * dirty again later on.
+ */
+ extent_range_clear_dirty_for_io(inode, start, end);
+ redirty = 1;
ret = btrfs_compress_pages(compress_type,
inode->i_mapping, start,
total_compressed, pages,
@@ -424,6 +442,7 @@ again:
will_compress = 1;
}
}
+cont:
if (start == 0) {
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
@@ -534,6 +553,8 @@ cleanup_and_bail_uncompressed:
__set_page_dirty_nobuffers(locked_page);
/* unlocked later on in the async handlers */
}
+ if (redirty)
+ extent_range_redirty_for_io(inode, start, end);
add_async_extent(async_cow, start, end - start + 1,
0, NULL, 0, BTRFS_COMPRESS_NONE);
*num_added += 1;
@@ -750,15 +771,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
return alloc_hint;
}
-static inline bool is_free_space_inode(struct btrfs_root *root,
- struct inode *inode)
-{
- if (root == root->fs_info->tree_root ||
- BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID)
- return true;
- return false;
-}
-
/*
* when extent_io.c finds a delayed allocation range in the file,
* the call backs end up in this code. The basic idea is to
@@ -791,7 +803,7 @@ static noinline int cow_file_range(struct inode *inode,
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
int ret = 0;
- BUG_ON(is_free_space_inode(root, inode));
+ BUG_ON(btrfs_is_free_space_inode(root, inode));
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
@@ -829,7 +841,7 @@ static noinline int cow_file_range(struct inode *inode,
}
BUG_ON(disk_num_bytes >
- btrfs_super_total_bytes(&root->fs_info->super_copy));
+ btrfs_super_total_bytes(root->fs_info->super_copy));
alloc_hint = get_extent_allocation_hint(inode, start, num_bytes);
btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);
@@ -1070,9 +1082,10 @@ static noinline int run_delalloc_nocow(struct inode *inode,
u64 ino = btrfs_ino(inode);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
- nolock = is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(root, inode);
if (nolock)
trans = btrfs_join_transaction_nolock(root);
@@ -1291,15 +1304,16 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
return ret;
}
-static int btrfs_split_extent_hook(struct inode *inode,
- struct extent_state *orig, u64 split)
+static void btrfs_split_extent_hook(struct inode *inode,
+ struct extent_state *orig, u64 split)
{
/* not delalloc, ignore it */
if (!(orig->state & EXTENT_DELALLOC))
- return 0;
+ return;
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
- return 0;
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
}
/*
@@ -1308,16 +1322,17 @@ static int btrfs_split_extent_hook(struct inode *inode,
* extents, such as when we are doing sequential writes, so we can properly
* account for the metadata space we'll need.
*/
-static int btrfs_merge_extent_hook(struct inode *inode,
- struct extent_state *new,
- struct extent_state *other)
+static void btrfs_merge_extent_hook(struct inode *inode,
+ struct extent_state *new,
+ struct extent_state *other)
{
/* not delalloc, ignore it */
if (!(other->state & EXTENT_DELALLOC))
- return 0;
+ return;
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
- return 0;
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
}
/*
@@ -1325,8 +1340,8 @@ static int btrfs_merge_extent_hook(struct inode *inode,
* bytes in this file, and to maintain the list of inodes that
* have pending delalloc work to be done.
*/
-static int btrfs_set_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+static void btrfs_set_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
@@ -1337,12 +1352,15 @@ static int btrfs_set_bit_hook(struct inode *inode,
if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(root, inode);
- if (*bits & EXTENT_FIRST_DELALLOC)
+ if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
- else
- atomic_inc(&BTRFS_I(inode)->outstanding_extents);
+ } else {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
spin_lock(&root->fs_info->delalloc_lock);
BTRFS_I(inode)->delalloc_bytes += len;
@@ -1353,14 +1371,13 @@ static int btrfs_set_bit_hook(struct inode *inode,
}
spin_unlock(&root->fs_info->delalloc_lock);
}
- return 0;
}
/*
* extent_io.c clear_bit_hook, see set_bit_hook for why
*/
-static int btrfs_clear_bit_hook(struct inode *inode,
- struct extent_state *state, int *bits)
+static void btrfs_clear_bit_hook(struct inode *inode,
+ struct extent_state *state, int *bits)
{
/*
* set_bit and clear bit hooks normally require _irqsave/restore
@@ -1370,12 +1387,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
struct btrfs_root *root = BTRFS_I(inode)->root;
u64 len = state->end + 1 - state->start;
- bool do_list = !is_free_space_inode(root, inode);
+ bool do_list = !btrfs_is_free_space_inode(root, inode);
- if (*bits & EXTENT_FIRST_DELALLOC)
+ if (*bits & EXTENT_FIRST_DELALLOC) {
*bits &= ~EXTENT_FIRST_DELALLOC;
- else if (!(*bits & EXTENT_DO_ACCOUNTING))
- atomic_dec(&BTRFS_I(inode)->outstanding_extents);
+ } else if (!(*bits & EXTENT_DO_ACCOUNTING)) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents--;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
if (*bits & EXTENT_DO_ACCOUNTING)
btrfs_delalloc_release_metadata(inode, len);
@@ -1394,7 +1414,6 @@ static int btrfs_clear_bit_hook(struct inode *inode,
}
spin_unlock(&root->fs_info->delalloc_lock);
}
- return 0;
}
/*
@@ -1477,7 +1496,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- if (is_free_space_inode(root, inode))
+ if (btrfs_is_free_space_inode(root, inode))
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
else
ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
@@ -1644,7 +1663,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
int ret;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
path->leave_spinning = 1;
@@ -1726,7 +1746,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
return 0;
BUG_ON(!ordered_extent);
- nolock = is_free_space_inode(root, inode);
+ nolock = btrfs_is_free_space_inode(root, inode);
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
BUG_ON(!list_empty(&ordered_extent->list));
@@ -1738,7 +1758,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
trans = btrfs_join_transaction(root);
BUG_ON(IS_ERR(trans));
trans->block_rsv = &root->fs_info->delalloc_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode_fallback(trans, root, inode);
BUG_ON(ret);
}
goto out;
@@ -1787,18 +1807,18 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
&ordered_extent->list);
ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
- if (!ret) {
- ret = btrfs_update_inode(trans, root, inode);
+ if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
+ ret = btrfs_update_inode_fallback(trans, root, inode);
BUG_ON(ret);
}
ret = 0;
out:
- if (nolock) {
- if (trans)
- btrfs_end_transaction_nolock(trans, root);
- } else {
+ if (root != root->fs_info->tree_root)
btrfs_delalloc_release_metadata(inode, ordered_extent->len);
- if (trans)
+ if (trans) {
+ if (nolock)
+ btrfs_end_transaction_nolock(trans, root);
+ else
btrfs_end_transaction(trans, root);
}
@@ -1820,153 +1840,9 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
}
/*
- * When IO fails, either with EIO or csum verification fails, we
- * try other mirrors that might have a good copy of the data. This
- * io_failure_record is used to record state as we go through all the
- * mirrors. If another mirror has good data, the page is set up to date
- * and things continue. If a good mirror can't be found, the original
- * bio end_io callback is called to indicate things have failed.
- */
-struct io_failure_record {
- struct page *page;
- u64 start;
- u64 len;
- u64 logical;
- unsigned long bio_flags;
- int last_mirror;
-};
-
-static int btrfs_io_failed_hook(struct bio *failed_bio,
- struct page *page, u64 start, u64 end,
- struct extent_state *state)
-{
- struct io_failure_record *failrec = NULL;
- u64 private;
- struct extent_map *em;
- struct inode *inode = page->mapping->host;
- struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
- struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
- struct bio *bio;
- int num_copies;
- int ret;
- int rw;
- u64 logical;
-
- ret = get_state_private(failure_tree, start, &private);
- if (ret) {
- failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
- if (!failrec)
- return -ENOMEM;
- failrec->start = start;
- failrec->len = end - start + 1;
- failrec->last_mirror = 0;
- failrec->bio_flags = 0;
-
- read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, start, failrec->len);
- if (em->start > start || em->start + em->len < start) {
- free_extent_map(em);
- em = NULL;
- }
- read_unlock(&em_tree->lock);
-
- if (IS_ERR_OR_NULL(em)) {
- kfree(failrec);
- return -EIO;
- }
- logical = start - em->start;
- logical = em->block_start + logical;
- if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
- logical = em->block_start;
- failrec->bio_flags = EXTENT_BIO_COMPRESSED;
- extent_set_compress_type(&failrec->bio_flags,
- em->compress_type);
- }
- failrec->logical = logical;
- free_extent_map(em);
- set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
- EXTENT_DIRTY, GFP_NOFS);
- set_state_private(failure_tree, start,
- (u64)(unsigned long)failrec);
- } else {
- failrec = (struct io_failure_record *)(unsigned long)private;
- }
- num_copies = btrfs_num_copies(
- &BTRFS_I(inode)->root->fs_info->mapping_tree,
- failrec->logical, failrec->len);
- failrec->last_mirror++;
- if (!state) {
- spin_lock(&BTRFS_I(inode)->io_tree.lock);
- state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
- failrec->start,
- EXTENT_LOCKED);
- if (state && state->start != failrec->start)
- state = NULL;
- spin_unlock(&BTRFS_I(inode)->io_tree.lock);
- }
- if (!state || failrec->last_mirror > num_copies) {
- set_state_private(failure_tree, failrec->start, 0);
- clear_extent_bits(failure_tree, failrec->start,
- failrec->start + failrec->len - 1,
- EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
- kfree(failrec);
- return -EIO;
- }
- bio = bio_alloc(GFP_NOFS, 1);
- bio->bi_private = state;
- bio->bi_end_io = failed_bio->bi_end_io;
- bio->bi_sector = failrec->logical >> 9;
- bio->bi_bdev = failed_bio->bi_bdev;
- bio->bi_size = 0;
-
- bio_add_page(bio, page, failrec->len, start - page_offset(page));
- if (failed_bio->bi_rw & REQ_WRITE)
- rw = WRITE;
- else
- rw = READ;
-
- ret = BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
- failrec->last_mirror,
- failrec->bio_flags, 0);
- return ret;
-}
-
-/*
- * each time an IO finishes, we do a fast check in the IO failure tree
- * to see if we need to process or clean up an io_failure_record
- */
-static int btrfs_clean_io_failures(struct inode *inode, u64 start)
-{
- u64 private;
- u64 private_failure;
- struct io_failure_record *failure;
- int ret;
-
- private = 0;
- if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
- (u64)-1, 1, EXTENT_DIRTY, 0)) {
- ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
- start, &private_failure);
- if (ret == 0) {
- failure = (struct io_failure_record *)(unsigned long)
- private_failure;
- set_state_private(&BTRFS_I(inode)->io_failure_tree,
- failure->start, 0);
- clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
- failure->start,
- failure->start + failure->len - 1,
- EXTENT_DIRTY | EXTENT_LOCKED,
- GFP_NOFS);
- kfree(failure);
- }
- }
- return 0;
-}
-
-/*
* when reads are done, we need to check csums to verify the data is correct
- * if there's a match, we allow the bio to finish. If not, we go through
- * the io_failure_record routines to find good copies
+ * if there's a match, we allow the bio to finish. If not, the code in
+ * extent_io.c will try to find good copies for us.
*/
static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state)
@@ -2012,10 +1888,6 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
kunmap_atomic(kaddr, KM_USER0);
good:
- /* if the io failure tree for this inode is non-empty,
- * check to see if we've recovered from a failed IO
- */
- btrfs_clean_io_failures(inode, start);
return 0;
zeroit:
@@ -2080,89 +1952,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
up_read(&root->fs_info->cleanup_work_sem);
}
-/*
- * calculate extra metadata reservation when snapshotting a subvolume
- * contains orphan files.
- */
-void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending,
- u64 *bytes_to_reserve)
-{
- struct btrfs_root *root;
- struct btrfs_block_rsv *block_rsv;
- u64 num_bytes;
- int index;
-
- root = pending->root;
- if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
- return;
-
- block_rsv = root->orphan_block_rsv;
-
- /* orphan block reservation for the snapshot */
- num_bytes = block_rsv->size;
-
- /*
- * after the snapshot is created, COWing tree blocks may use more
- * space than it frees. So we should make sure there is enough
- * reserved space.
- */
- index = trans->transid & 0x1;
- if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
- num_bytes += block_rsv->size -
- (block_rsv->reserved + block_rsv->freed[index]);
- }
-
- *bytes_to_reserve += num_bytes;
-}
-
-void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans,
- struct btrfs_pending_snapshot *pending)
-{
- struct btrfs_root *root = pending->root;
- struct btrfs_root *snap = pending->snap;
- struct btrfs_block_rsv *block_rsv;
- u64 num_bytes;
- int index;
- int ret;
-
- if (!root->orphan_block_rsv || list_empty(&root->orphan_list))
- return;
-
- /* refill source subvolume's orphan block reservation */
- block_rsv = root->orphan_block_rsv;
- index = trans->transid & 0x1;
- if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) {
- num_bytes = block_rsv->size -
- (block_rsv->reserved + block_rsv->freed[index]);
- ret = btrfs_block_rsv_migrate(&pending->block_rsv,
- root->orphan_block_rsv,
- num_bytes);
- BUG_ON(ret);
- }
-
- /* setup orphan block reservation for the snapshot */
- block_rsv = btrfs_alloc_block_rsv(snap);
- BUG_ON(!block_rsv);
-
- btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
- snap->orphan_block_rsv = block_rsv;
-
- num_bytes = root->orphan_block_rsv->size;
- ret = btrfs_block_rsv_migrate(&pending->block_rsv,
- block_rsv, num_bytes);
- BUG_ON(ret);
-
-#if 0
- /* insert orphan item for the snapshot */
- WARN_ON(!root->orphan_item_inserted);
- ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root,
- snap->root_key.objectid);
- BUG_ON(ret);
- snap->orphan_item_inserted = 1;
-#endif
-}
-
enum btrfs_orphan_cleanup_state {
ORPHAN_CLEANUP_STARTED = 1,
ORPHAN_CLEANUP_DONE = 2,
@@ -2214,7 +2003,8 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
if (!root->orphan_block_rsv) {
block_rsv = btrfs_alloc_block_rsv(root);
- BUG_ON(!block_rsv);
+ if (!block_rsv)
+ return -ENOMEM;
}
spin_lock(&root->orphan_lock);
@@ -2247,9 +2037,6 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
}
spin_unlock(&root->orphan_lock);
- if (block_rsv)
- btrfs_add_durable_block_rsv(root->fs_info, block_rsv);
-
/* grab metadata reservation from transaction handle */
if (reserve) {
ret = btrfs_orphan_reserve_metadata(trans, inode);
@@ -2259,7 +2046,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
/* insert an orphan item to track this unlinked/truncated file */
if (insert >= 1) {
ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
- BUG_ON(ret);
+ BUG_ON(ret && ret != -EEXIST);
}
/* insert an orphan item to track subvolume contains orphan files */
@@ -2316,6 +2103,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
struct btrfs_key key, found_key;
struct btrfs_trans_handle *trans;
struct inode *inode;
+ u64 last_objectid = 0;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED))
@@ -2367,41 +2155,81 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
* crossing root thing. we store the inode number in the
* offset of the orphan item.
*/
+
+ if (found_key.offset == last_objectid) {
+ printk(KERN_ERR "btrfs: Error removing orphan entry, "
+ "stopping orphan cleanup\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ last_objectid = found_key.offset;
+
found_key.objectid = found_key.offset;
found_key.type = BTRFS_INODE_ITEM_KEY;
found_key.offset = 0;
inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
- if (IS_ERR(inode)) {
- ret = PTR_ERR(inode);
+ ret = PTR_RET(inode);
+ if (ret && ret != -ESTALE)
goto out;
- }
- /*
- * add this inode to the orphan list so btrfs_orphan_del does
- * the proper thing when we hit it
- */
- spin_lock(&root->orphan_lock);
- list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
- spin_unlock(&root->orphan_lock);
+ if (ret == -ESTALE && root == root->fs_info->tree_root) {
+ struct btrfs_root *dead_root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ int is_dead_root = 0;
+ /*
+ * this is an orphan in the tree root. Currently these
+ * could come from 2 sources:
+ * a) a snapshot deletion in progress
+ * b) a free space cache inode
+ * We need to distinguish those two, as the snapshot
+ * orphan must not get deleted.
+ * find_dead_roots already ran before us, so if this
+ * is a snapshot deletion, we should find the root
+ * in the dead_roots list
+ */
+ spin_lock(&fs_info->trans_lock);
+ list_for_each_entry(dead_root, &fs_info->dead_roots,
+ root_list) {
+ if (dead_root->root_key.objectid ==
+ found_key.objectid) {
+ is_dead_root = 1;
+ break;
+ }
+ }
+ spin_unlock(&fs_info->trans_lock);
+ if (is_dead_root) {
+ /* prevent this orphan from being found again */
+ key.offset = found_key.objectid - 1;
+ continue;
+ }
+ }
/*
- * if this is a bad inode, means we actually succeeded in
- * removing the inode, but not the orphan record, which means
- * we need to manually delete the orphan since iput will just
- * do a destroy_inode
+ * Inode is already gone but the orphan item is still there,
+ * kill the orphan item.
*/
- if (is_bad_inode(inode)) {
- trans = btrfs_start_transaction(root, 0);
+ if (ret == -ESTALE) {
+ trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
goto out;
}
- btrfs_orphan_del(trans, inode);
+ ret = btrfs_del_orphan_item(trans, root,
+ found_key.objectid);
+ BUG_ON(ret);
btrfs_end_transaction(trans, root);
- iput(inode);
continue;
}
+ /*
+ * add this inode to the orphan list so btrfs_orphan_del does
+ * the proper thing when we hit it
+ */
+ spin_lock(&root->orphan_lock);
+ list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+ spin_unlock(&root->orphan_lock);
+
/* if we have links, this was a truncate, lets do that */
if (inode->i_nlink) {
if (!S_ISREG(inode->i_mode)) {
@@ -2410,7 +2238,14 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
continue;
}
nr_truncate++;
+ /*
+ * Need to hold the imutex for reservation purposes, not
+ * a huge deal here but I have a WARN_ON in
+ * btrfs_delalloc_reserve_space to catch offenders.
+ */
+ mutex_lock(&inode->i_mutex);
ret = btrfs_truncate(inode);
+ mutex_unlock(&inode->i_mutex);
} else {
nr_unlink++;
}
@@ -2420,6 +2255,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
if (ret)
goto out;
}
+ /* release the path since we're done with it */
+ btrfs_release_path(path);
+
root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE;
if (root->orphan_block_rsv)
@@ -2516,7 +2354,9 @@ static void btrfs_read_locked_inode(struct inode *inode)
filled = true;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ goto make_bad;
+
path->leave_spinning = 1;
memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
@@ -2531,15 +2371,8 @@ static void btrfs_read_locked_inode(struct inode *inode)
inode_item = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_inode_item);
- if (!leaf->map_token)
- map_private_extent_buffer(leaf, (unsigned long)inode_item,
- sizeof(struct btrfs_inode_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
-
inode->i_mode = btrfs_inode_mode(leaf, inode_item);
- inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
+ set_nlink(inode, btrfs_inode_nlink(leaf, inode_item));
inode->i_uid = btrfs_inode_uid(leaf, inode_item);
inode->i_gid = btrfs_inode_gid(leaf, inode_item);
btrfs_i_size_write(inode, btrfs_inode_size(leaf, inode_item));
@@ -2575,11 +2408,6 @@ cache_acl:
if (!maybe_acls)
cache_no_acl(inode);
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
-
btrfs_free_path(path);
switch (inode->i_mode & S_IFMT) {
@@ -2624,13 +2452,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_inode_item *item,
struct inode *inode)
{
- if (!leaf->map_token)
- map_private_extent_buffer(leaf, (unsigned long)item,
- sizeof(struct btrfs_inode_item),
- &leaf->map_token, &leaf->kaddr,
- &leaf->map_start, &leaf->map_len,
- KM_USER1);
-
btrfs_set_inode_uid(leaf, item, inode->i_uid);
btrfs_set_inode_gid(leaf, item, inode->i_gid);
btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size);
@@ -2659,17 +2480,12 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
btrfs_set_inode_block_group(leaf, item, 0);
-
- if (leaf->map_token) {
- unmap_extent_buffer(leaf, leaf->map_token, KM_USER1);
- leaf->map_token = NULL;
- }
}
/*
* copy everything in the in-memory inode into the btree.
*/
-noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct inode *inode)
{
struct btrfs_inode_item *inode_item;
@@ -2677,21 +2493,6 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
int ret;
- /*
- * If the inode is a free space inode, we can deadlock during commit
- * if we put it into the delayed code.
- *
- * The data relocation inode should also be directly updated
- * without delay
- */
- if (!is_free_space_inode(root, inode)
- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
- ret = btrfs_delayed_update_inode(trans, root, inode);
- if (!ret)
- btrfs_set_inode_last_trans(trans, inode);
- return ret;
- }
-
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
@@ -2720,6 +2521,43 @@ failed:
}
/*
+ * copy everything in the in-memory inode into the btree.
+ */
+noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode)
+{
+ int ret;
+
+ /*
+ * If the inode is a free space inode, we can deadlock during commit
+ * if we put it into the delayed code.
+ *
+ * The data relocation inode should also be directly updated
+ * without delay
+ */
+ if (!btrfs_is_free_space_inode(root, inode)
+ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ ret = btrfs_delayed_update_inode(trans, root, inode);
+ if (!ret)
+ btrfs_set_inode_last_trans(trans, inode);
+ return ret;
+ }
+
+ return btrfs_update_inode_item(trans, root, inode);
+}
+
+static noinline int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct inode *inode)
+{
+ int ret;
+
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret == -ENOSPC)
+ return btrfs_update_inode_item(trans, root, inode);
+ return ret;
+}
+
+/*
* unlink helper that gets used here in inode.c and in the tree logging
* recovery code. It remove a link in a directory with a given name, and
* also drops the back refs in the inode to the directory
@@ -2857,7 +2695,16 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
u64 ino = btrfs_ino(inode);
u64 dir_ino = btrfs_ino(dir);
- trans = btrfs_start_transaction(root, 10);
+ /*
+ * 1 for the possible orphan item
+ * 1 for the dir item
+ * 1 for the dir index
+ * 1 for the inode ref
+ * 1 for the inode ref in the tree log
+ * 2 for the dir entries in the log
+ * 1 for the inode
+ */
+ trans = btrfs_start_transaction(root, 8);
if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC)
return trans;
@@ -2880,7 +2727,8 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
return ERR_PTR(-ENOMEM);
}
- trans = btrfs_start_transaction(root, 0);
+ /* 1 for the orphan item */
+ trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
btrfs_free_path(path);
root->fs_info->enospc_unlink = 0;
@@ -2985,6 +2833,12 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir,
err = 0;
out:
btrfs_free_path(path);
+ /* Migrate the orphan reservation over */
+ if (!err)
+ err = btrfs_block_rsv_migrate(trans->block_rsv,
+ &root->fs_info->global_block_rsv,
+ trans->bytes_reserved);
+
if (err) {
btrfs_end_transaction(trans, root);
root->fs_info->enospc_unlink = 0;
@@ -2999,6 +2853,9 @@ static void __unlink_end_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (trans->block_rsv == &root->fs_info->global_block_rsv) {
+ btrfs_block_rsv_release(root, trans->block_rsv,
+ trans->bytes_reserved);
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
BUG_ON(!root->fs_info->enospc_unlink);
root->fs_info->enospc_unlink = 0;
}
@@ -3021,13 +2878,16 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
dentry->d_name.name, dentry->d_name.len);
- BUG_ON(ret);
+ if (ret)
+ goto out;
if (inode->i_nlink == 0) {
ret = btrfs_orphan_add(trans, inode);
- BUG_ON(ret);
+ if (ret)
+ goto out;
}
+out:
nr = trans->blocks_used;
__unlink_end_trans(trans, root);
btrfs_btree_balance_dirty(root, nr);
@@ -3170,6 +3030,11 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ path->reada = -1;
+
if (root->ref_cows || root == root->fs_info->tree_root)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
@@ -3182,10 +3047,6 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
if (min_type == 0 && root == BTRFS_I(inode)->root)
btrfs_kill_delayed_inode_items(inode);
- path = btrfs_alloc_path();
- BUG_ON(!path);
- path->reada = -1;
-
key.objectid = ino;
key.offset = (u64)-1;
key.type = (u8)-1;
@@ -3386,6 +3247,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
pgoff_t index = from >> PAGE_CACHE_SHIFT;
unsigned offset = from & (PAGE_CACHE_SIZE-1);
struct page *page;
+ gfp_t mask = btrfs_alloc_write_mask(mapping);
int ret = 0;
u64 page_start;
u64 page_end;
@@ -3398,7 +3260,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
ret = -ENOMEM;
again:
- page = grab_cache_page(mapping, index);
+ page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
goto out;
@@ -3519,7 +3381,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
u64 hint_byte = 0;
hole_size = last_byte - cur_offset;
- trans = btrfs_start_transaction(root, 2);
+ trans = btrfs_start_transaction(root, 3);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
break;
@@ -3528,19 +3390,26 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
err = btrfs_drop_extents(trans, inode, cur_offset,
cur_offset + hole_size,
&hint_byte, 1);
- if (err)
+ if (err) {
+ btrfs_update_inode(trans, root, inode);
+ btrfs_end_transaction(trans, root);
break;
+ }
err = btrfs_insert_file_extent(trans, root,
btrfs_ino(inode), cur_offset, 0,
0, hole_size, 0, hole_size,
0, 0, 0);
- if (err)
+ if (err) {
+ btrfs_update_inode(trans, root, inode);
+ btrfs_end_transaction(trans, root);
break;
+ }
btrfs_drop_extent_cache(inode, hole_start,
last_byte - 1, 0);
+ btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
}
free_extent_map(em);
@@ -3558,6 +3427,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
static int btrfs_setsize(struct inode *inode, loff_t newsize)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
loff_t oldsize = i_size_read(inode);
int ret;
@@ -3565,16 +3436,19 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
return 0;
if (newsize > oldsize) {
- i_size_write(inode, newsize);
- btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
truncate_pagecache(inode, oldsize, newsize);
ret = btrfs_cont_expand(inode, oldsize, newsize);
- if (ret) {
- btrfs_setsize(inode, oldsize);
+ if (ret)
return ret;
- }
- mark_inode_dirty(inode);
+ trans = btrfs_start_transaction(root, 1);
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
+
+ i_size_write(inode, newsize);
+ btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+ ret = btrfs_update_inode(trans, root, inode);
+ btrfs_end_transaction_throttle(trans, root);
} else {
/*
@@ -3614,9 +3488,9 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid) {
setattr_copy(inode, attr);
- mark_inode_dirty(inode);
+ err = btrfs_dirty_inode(inode);
- if (attr->ia_valid & ATTR_MODE)
+ if (!err && attr->ia_valid & ATTR_MODE)
err = btrfs_acl_chmod(inode);
}
@@ -3627,6 +3501,8 @@ void btrfs_evict_inode(struct inode *inode)
{
struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_block_rsv *rsv, *global_rsv;
+ u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
unsigned long nr;
int ret;
@@ -3634,7 +3510,7 @@ void btrfs_evict_inode(struct inode *inode)
truncate_inode_pages(&inode->i_data, 0);
if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
- is_free_space_inode(root, inode)))
+ btrfs_is_free_space_inode(root, inode)))
goto no_delete;
if (is_bad_inode(inode)) {
@@ -3642,7 +3518,8 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (!special_file(inode->i_mode))
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
if (root->fs_info->log_root_recovering) {
BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
@@ -3654,22 +3531,55 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
+ rsv = btrfs_alloc_block_rsv(root);
+ if (!rsv) {
+ btrfs_orphan_del(NULL, inode);
+ goto no_delete;
+ }
+ rsv->size = min_size;
+ global_rsv = &root->fs_info->global_block_rsv;
+
btrfs_i_size_write(inode, 0);
+ /*
+ * This is a bit simpler than btrfs_truncate since
+ *
+ * 1) We've already reserved our space for our orphan item in the
+ * unlink.
+ * 2) We're going to delete the inode item, so we don't need to update
+ * it at all.
+ *
+ * So we just need to reserve some slack space in case we add bytes when
+ * doing the truncate.
+ */
while (1) {
- trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
- trans->block_rsv = root->orphan_block_rsv;
+ ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size);
+
+ /*
+ * Try and steal from the global reserve since we will
+ * likely not use this space anyway, we want to try as
+ * hard as possible to get this to work.
+ */
+ if (ret)
+ ret = btrfs_block_rsv_migrate(global_rsv, rsv, min_size);
- ret = btrfs_block_rsv_check(trans, root,
- root->orphan_block_rsv, 0, 5);
if (ret) {
- BUG_ON(ret != -EAGAIN);
- ret = btrfs_commit_transaction(trans, root);
- BUG_ON(ret);
- continue;
+ printk(KERN_WARNING "Could not get space for a "
+ "delete, will truncate on mount %d\n", ret);
+ btrfs_orphan_del(NULL, inode);
+ btrfs_free_block_rsv(root, rsv);
+ goto no_delete;
+ }
+
+ trans = btrfs_start_transaction(root, 0);
+ if (IS_ERR(trans)) {
+ btrfs_orphan_del(NULL, inode);
+ btrfs_free_block_rsv(root, rsv);
+ goto no_delete;
}
+ trans->block_rsv = rsv;
+
ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
if (ret != -EAGAIN)
break;
@@ -3678,14 +3588,17 @@ void btrfs_evict_inode(struct inode *inode)
btrfs_end_transaction(trans, root);
trans = NULL;
btrfs_btree_balance_dirty(root, nr);
-
}
+ btrfs_free_block_rsv(root, rsv);
+
if (ret == 0) {
+ trans->block_rsv = root->orphan_block_rsv;
ret = btrfs_orphan_del(trans, inode);
BUG_ON(ret);
}
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
if (!(root == root->fs_info->tree_root ||
root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID))
btrfs_return_ino(root, btrfs_ino(inode));
@@ -3713,7 +3626,8 @@ static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
int ret = 0;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
di = btrfs_lookup_dir_item(NULL, root, path, btrfs_ino(dir), name,
namelen, 0);
@@ -3934,7 +3848,9 @@ again:
static int btrfs_init_locked_inode(struct inode *inode, void *p)
{
struct btrfs_iget_args *args = p;
- inode->i_ino = args->ino;
+ inode->i_ino = args->location->objectid;
+ memcpy(&BTRFS_I(inode)->location, args->location,
+ sizeof(*args->location));
BTRFS_I(inode)->root = args->root;
btrfs_set_inode_space_info(args->root, inode);
return 0;
@@ -3943,20 +3859,20 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)
static int btrfs_find_actor(struct inode *inode, void *opaque)
{
struct btrfs_iget_args *args = opaque;
- return args->ino == btrfs_ino(inode) &&
+ return args->location->objectid == BTRFS_I(inode)->location.objectid &&
args->root == BTRFS_I(inode)->root;
}
static struct inode *btrfs_iget_locked(struct super_block *s,
- u64 objectid,
+ struct btrfs_key *location,
struct btrfs_root *root)
{
struct inode *inode;
struct btrfs_iget_args args;
- args.ino = objectid;
+ args.location = location;
args.root = root;
- inode = iget5_locked(s, objectid, btrfs_find_actor,
+ inode = iget5_locked(s, location->objectid, btrfs_find_actor,
btrfs_init_locked_inode,
(void *)&args);
return inode;
@@ -3970,18 +3886,22 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
{
struct inode *inode;
- inode = btrfs_iget_locked(s, location->objectid, root);
+ inode = btrfs_iget_locked(s, location, root);
if (!inode)
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- BTRFS_I(inode)->root = root;
- memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));
btrfs_read_locked_inode(inode);
- inode_tree_add(inode);
- unlock_new_inode(inode);
- if (new)
- *new = 1;
+ if (!is_bad_inode(inode)) {
+ inode_tree_add(inode);
+ unlock_new_inode(inode);
+ if (new)
+ *new = 1;
+ } else {
+ unlock_new_inode(inode);
+ iput(inode);
+ inode = ERR_PTR(-ESTALE);
+ }
}
return inode;
@@ -4016,12 +3936,20 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
struct btrfs_root *sub_root = root;
struct btrfs_key location;
int index;
- int ret;
+ int ret = 0;
if (dentry->d_name.len > BTRFS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
- ret = btrfs_inode_by_name(dir, dentry, &location);
+ if (unlikely(d_need_lookup(dentry))) {
+ memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key));
+ kfree(dentry->d_fsdata);
+ dentry->d_fsdata = NULL;
+ /* This thing is hashed, drop it for now */
+ d_drop(dentry);
+ } else {
+ ret = btrfs_inode_by_name(dir, dentry, &location);
+ }
if (ret < 0)
return ERR_PTR(ret);
@@ -4076,16 +4004,24 @@ static int btrfs_dentry_delete(const struct dentry *dentry)
return 0;
}
+static void btrfs_dentry_release(struct dentry *dentry)
+{
+ if (dentry->d_fsdata)
+ kfree(dentry->d_fsdata);
+}
+
static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
- struct inode *inode;
-
- inode = btrfs_lookup_dentry(dir, dentry);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
+ struct dentry *ret;
- return d_splice_alias(inode, dentry);
+ ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry);
+ if (unlikely(d_need_lookup(dentry))) {
+ spin_lock(&dentry->d_lock);
+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
+ spin_unlock(&dentry->d_lock);
+ }
+ return ret;
}
unsigned char btrfs_filetype_table[] = {
@@ -4104,6 +4040,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
struct btrfs_path *path;
struct list_head ins_list;
struct list_head del_list;
+ struct qstr q;
int ret;
struct extent_buffer *leaf;
int slot;
@@ -4194,6 +4131,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
while (di_cur < di_total) {
struct btrfs_key location;
+ struct dentry *tmp;
if (verify_dir_item(root, leaf, di))
break;
@@ -4214,6 +4152,33 @@ static int btrfs_real_readdir(struct file *filp, void *dirent,
d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
btrfs_dir_item_key_to_cpu(leaf, di, &location);
+ q.name = name_ptr;
+ q.len = name_len;
+ q.hash = full_name_hash(q.name, q.len);
+ tmp = d_lookup(filp->f_dentry, &q);
+ if (!tmp) {
+ struct btrfs_key *newkey;
+
+ newkey = kzalloc(sizeof(struct btrfs_key),
+ GFP_NOFS);
+ if (!newkey)
+ goto no_dentry;
+ tmp = d_alloc(filp->f_dentry, &q);
+ if (!tmp) {
+ kfree(newkey);
+ dput(tmp);
+ goto no_dentry;
+ }
+ memcpy(newkey, &location,
+ sizeof(struct btrfs_key));
+ tmp->d_fsdata = newkey;
+ tmp->d_flags |= DCACHE_NEED_LOOKUP;
+ d_rehash(tmp);
+ dput(tmp);
+ } else {
+ dput(tmp);
+ }
+no_dentry:
/* is this a reference to our own snapshot? If so
* skip it
*/
@@ -4278,7 +4243,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (BTRFS_I(inode)->dummy_inode)
return 0;
- if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode))
+ if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode))
nolock = true;
if (wbc->sync_mode == WB_SYNC_ALL) {
@@ -4302,42 +4267,80 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
* FIXME, needs more benchmarking...there are no reasons other than performance
* to keep or drop this code.
*/
-void btrfs_dirty_inode(struct inode *inode, int flags)
+int btrfs_dirty_inode(struct inode *inode)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
int ret;
if (BTRFS_I(inode)->dummy_inode)
- return;
+ return 0;
trans = btrfs_join_transaction(root);
- BUG_ON(IS_ERR(trans));
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
if (ret && ret == -ENOSPC) {
/* whoops, lets try again with the full transaction */
btrfs_end_transaction(trans, root);
trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- printk_ratelimited(KERN_ERR "btrfs: fail to "
- "dirty inode %llu error %ld\n",
- (unsigned long long)btrfs_ino(inode),
- PTR_ERR(trans));
- return;
- }
+ if (IS_ERR(trans))
+ return PTR_ERR(trans);
ret = btrfs_update_inode(trans, root, inode);
- if (ret) {
- printk_ratelimited(KERN_ERR "btrfs: fail to "
- "dirty inode %llu error %d\n",
- (unsigned long long)btrfs_ino(inode),
- ret);
- }
}
btrfs_end_transaction(trans, root);
if (BTRFS_I(inode)->delayed_node)
btrfs_balance_delayed_items(root);
+
+ return ret;
+}
+
+/*
+ * This is a copy of file_update_time. We need this so we can return error on
+ * ENOSPC for updating the inode in the case of file write and mmap writes.
+ */
+int btrfs_update_time(struct file *file)
+{
+ struct inode *inode = file->f_path.dentry->d_inode;
+ struct timespec now;
+ int ret;
+ enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+
+ /* First try to exhaust all avenues to not sync */
+ if (IS_NOCMTIME(inode))
+ return 0;
+
+ now = current_fs_time(inode->i_sb);
+ if (!timespec_equal(&inode->i_mtime, &now))
+ sync_it = S_MTIME;
+
+ if (!timespec_equal(&inode->i_ctime, &now))
+ sync_it |= S_CTIME;
+
+ if (IS_I_VERSION(inode))
+ sync_it |= S_VERSION;
+
+ if (!sync_it)
+ return 0;
+
+ /* Finally allowed to write? Takes lock. */
+ if (mnt_want_write_file(file))
+ return 0;
+
+ /* Only change inode inside the lock region */
+ if (sync_it & S_VERSION)
+ inode_inc_iversion(inode);
+ if (sync_it & S_CTIME)
+ inode->i_ctime = now;
+ if (sync_it & S_MTIME)
+ inode->i_mtime = now;
+ ret = btrfs_dirty_inode(inode);
+ if (!ret)
+ mark_inode_dirty_sync(inode);
+ mnt_drop_write(file->f_path.mnt);
+ return ret;
}
/*
@@ -4439,7 +4442,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
int owner;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return ERR_PTR(-ENOMEM);
inode = new_inode(root->fs_info->sb);
if (!inode) {
@@ -4474,7 +4478,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode->i_generation = BTRFS_I(inode)->generation;
btrfs_set_inode_space_info(root, inode);
- if (mode & S_IFDIR)
+ if (S_ISDIR(mode))
owner = 0;
else
owner = 1;
@@ -4519,7 +4523,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
btrfs_inherit_iflags(inode, dir);
- if ((mode & S_IFREG)) {
+ if (S_ISREG(mode)) {
if (btrfs_test_opt(root, NODATASUM))
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM;
if (btrfs_test_opt(root, NODATACOW) ||
@@ -4601,10 +4605,6 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
int err = btrfs_add_link(trans, dir, inode,
dentry->d_name.name, dentry->d_name.len,
backref, index);
- if (!err) {
- d_instantiate(dentry, inode);
- return 0;
- }
if (err > 0)
err = -EEXIST;
return err;
@@ -4652,13 +4652,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
+ /*
+ * If the active LSM wants to access the inode during
+ * d_instantiate it needs these. Smack checks to see
+ * if the filesystem supports xattrs by looking at the
+ * ops vector.
+ */
+
+ inode->i_op = &btrfs_special_inode_operations;
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
- inode->i_op = &btrfs_special_inode_operations;
init_special_inode(inode, inode->i_mode, rdev);
btrfs_update_inode(trans, root, inode);
+ d_instantiate(dentry, inode);
}
out_unlock:
nr = trans->blocks_used;
@@ -4710,15 +4718,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
+ /*
+ * If the active LSM wants to access the inode during
+ * d_instantiate it needs these. Smack checks to see
+ * if the filesystem supports xattrs by looking at the
+ * ops vector.
+ */
+ inode->i_fop = &btrfs_file_operations;
+ inode->i_op = &btrfs_file_inode_operations;
+
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+ d_instantiate(dentry, inode);
}
out_unlock:
nr = trans->blocks_used;
@@ -4773,11 +4789,11 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (err) {
drop_inode = 1;
} else {
- struct dentry *parent = dget_parent(dentry);
+ struct dentry *parent = dentry->d_parent;
err = btrfs_update_inode(trans, root, inode);
BUG_ON(err);
+ d_instantiate(dentry, inode);
btrfs_log_new_name(trans, inode, NULL, parent);
- dput(parent);
}
nr = trans->blocks_used;
@@ -5757,8 +5773,7 @@ again:
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
ret = btrfs_ordered_update_i_size(inode, 0, ordered);
if (!ret)
- ret = btrfs_update_inode(trans, root, inode);
- err = ret;
+ err = btrfs_update_inode_fallback(trans, root, inode);
goto out;
}
@@ -5795,8 +5810,8 @@ again:
add_pending_csums(trans, inode, ordered->file_offset, &ordered->list);
ret = btrfs_ordered_update_i_size(inode, 0, ordered);
- if (!ret)
- btrfs_update_inode(trans, root, inode);
+ if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
+ btrfs_update_inode_fallback(trans, root, inode);
ret = 0;
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset,
@@ -6251,7 +6266,7 @@ int btrfs_readpage(struct file *file, struct page *page)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
- return extent_read_full_page(tree, page, btrfs_get_extent);
+ return extent_read_full_page(tree, page, btrfs_get_extent, 0);
}
static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
@@ -6402,7 +6417,12 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
u64 page_start;
u64 page_end;
+ /* Need this to keep space reservations serialized */
+ mutex_lock(&inode->i_mutex);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
+ mutex_unlock(&inode->i_mutex);
+ if (!ret)
+ ret = btrfs_update_time(vma->vm_file);
if (ret) {
if (ret == -ENOMEM)
ret = VM_FAULT_OOM;
@@ -6503,6 +6523,7 @@ static int btrfs_truncate(struct inode *inode)
struct btrfs_trans_handle *trans;
unsigned long nr;
u64 mask = root->sectorsize - 1;
+ u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
@@ -6550,19 +6571,23 @@ static int btrfs_truncate(struct inode *inode)
rsv = btrfs_alloc_block_rsv(root);
if (!rsv)
return -ENOMEM;
- btrfs_add_durable_block_rsv(root->fs_info, rsv);
+ rsv->size = min_size;
+ /*
+ * 1 for the truncate slack space
+ * 1 for the orphan item we're going to add
+ * 1 for the orphan item deletion
+ * 1 for updating the inode.
+ */
trans = btrfs_start_transaction(root, 4);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
goto out;
}
- /*
- * Reserve space for the truncate process. Truncate should be adding
- * space, but if there are snapshots it may end up using space.
- */
- ret = btrfs_truncate_reserve_metadata(trans, root, rsv);
+ /* Migrate the slack space for the truncate to our reserve */
+ ret = btrfs_block_rsv_migrate(&root->fs_info->trans_block_rsv, rsv,
+ min_size);
BUG_ON(ret);
ret = btrfs_orphan_add(trans, inode);
@@ -6571,21 +6596,6 @@ static int btrfs_truncate(struct inode *inode)
goto out;
}
- nr = trans->blocks_used;
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
-
- /*
- * Ok so we've already migrated our bytes over for the truncate, so here
- * just reserve the one slot we need for updating the inode.
- */
- trans = btrfs_start_transaction(root, 1);
- if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out;
- }
- trans->block_rsv = rsv;
-
/*
* setattr is responsible for setting the ordered_data_close flag,
* but that is only tested during the last file release. That
@@ -6607,20 +6617,31 @@ static int btrfs_truncate(struct inode *inode)
btrfs_add_ordered_operation(trans, root, inode);
while (1) {
+ ret = btrfs_block_rsv_refill(root, rsv, min_size);
+ if (ret) {
+ /*
+ * This can only happen with the original transaction we
+ * started above, every other time we shouldn't have a
+ * transaction started yet.
+ */
+ if (ret == -EAGAIN)
+ goto end_trans;
+ err = ret;
+ break;
+ }
+
if (!trans) {
- trans = btrfs_start_transaction(root, 3);
+ /* Just need the 1 for updating the inode */
+ trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
- err = PTR_ERR(trans);
- goto out;
+ ret = err = PTR_ERR(trans);
+ trans = NULL;
+ break;
}
-
- ret = btrfs_truncate_reserve_metadata(trans, root,
- rsv);
- BUG_ON(ret);
-
- trans->block_rsv = rsv;
}
+ trans->block_rsv = rsv;
+
ret = btrfs_truncate_inode_items(trans, root, inode,
inode->i_size,
BTRFS_EXTENT_DATA_KEY);
@@ -6635,7 +6656,7 @@ static int btrfs_truncate(struct inode *inode)
err = ret;
break;
}
-
+end_trans:
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
trans = NULL;
@@ -6655,14 +6676,16 @@ static int btrfs_truncate(struct inode *inode)
ret = btrfs_orphan_del(NULL, inode);
}
- trans->block_rsv = &root->fs_info->trans_block_rsv;
- ret = btrfs_update_inode(trans, root, inode);
- if (ret && !err)
- err = ret;
+ if (trans) {
+ trans->block_rsv = &root->fs_info->trans_block_rsv;
+ ret = btrfs_update_inode(trans, root, inode);
+ if (ret && !err)
+ err = ret;
- nr = trans->blocks_used;
- ret = btrfs_end_transaction_throttle(trans, root);
- btrfs_btree_balance_dirty(root, nr);
+ nr = trans->blocks_used;
+ ret = btrfs_end_transaction_throttle(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+ }
out:
btrfs_free_block_rsv(root, rsv);
@@ -6690,7 +6713,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
inode->i_op = &btrfs_dir_inode_operations;
inode->i_fop = &btrfs_dir_file_operations;
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
btrfs_i_size_write(inode, 0);
err = btrfs_update_inode(trans, new_root, inode);
@@ -6700,19 +6723,6 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
return 0;
}
-/* helper function for file defrag and space balancing. This
- * forces readahead on a given range of bytes in an inode
- */
-unsigned long btrfs_force_ra(struct address_space *mapping,
- struct file_ra_state *ra, struct file *file,
- pgoff_t offset, pgoff_t last_index)
-{
- pgoff_t req_size = last_index - offset + 1;
-
- page_cache_sync_readahead(mapping, ra, file, offset, req_size);
- return offset + req_size;
-}
-
struct inode *btrfs_alloc_inode(struct super_block *sb)
{
struct btrfs_inode *ei;
@@ -6730,19 +6740,21 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->last_sub_trans = 0;
ei->logged_trans = 0;
ei->delalloc_bytes = 0;
- ei->reserved_bytes = 0;
ei->disk_i_size = 0;
ei->flags = 0;
+ ei->csum_bytes = 0;
ei->index_cnt = (u64)-1;
ei->last_unlink_trans = 0;
- atomic_set(&ei->outstanding_extents, 0);
- atomic_set(&ei->reserved_extents, 0);
+ spin_lock_init(&ei->lock);
+ ei->outstanding_extents = 0;
+ ei->reserved_extents = 0;
ei->ordered_data_close = 0;
ei->orphan_meta_reserved = 0;
ei->dummy_inode = 0;
ei->in_defrag = 0;
+ ei->delalloc_meta_reserved = 0;
ei->force_compress = BTRFS_COMPRESS_NONE;
ei->delayed_node = NULL;
@@ -6775,8 +6787,10 @@ void btrfs_destroy_inode(struct inode *inode)
WARN_ON(!list_empty(&inode->i_dentry));
WARN_ON(inode->i_data.nrpages);
- WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents));
- WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents));
+ WARN_ON(BTRFS_I(inode)->outstanding_extents);
+ WARN_ON(BTRFS_I(inode)->reserved_extents);
+ WARN_ON(BTRFS_I(inode)->delalloc_bytes);
+ WARN_ON(BTRFS_I(inode)->csum_bytes);
/*
* This can happen where we create an inode, but somebody else also
@@ -6831,7 +6845,7 @@ int btrfs_drop_inode(struct inode *inode)
struct btrfs_root *root = BTRFS_I(inode)->root;
if (btrfs_root_refs(&root->root_item) == 0 &&
- !is_free_space_inode(root, inode))
+ !btrfs_is_free_space_inode(root, inode))
return 1;
else
return generic_drop_inode(inode);
@@ -6900,11 +6914,13 @@ static int btrfs_getattr(struct vfsmount *mnt,
struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
+ u32 blocksize = inode->i_sb->s_blocksize;
+
generic_fillattr(inode, stat);
- stat->dev = BTRFS_I(inode)->root->anon_super.s_dev;
+ stat->dev = BTRFS_I(inode)->root->anon_dev;
stat->blksize = PAGE_CACHE_SIZE;
- stat->blocks = (inode_get_bytes(inode) +
- BTRFS_I(inode)->delalloc_bytes) >> 9;
+ stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
+ ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9;
return 0;
}
@@ -7069,9 +7085,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
BUG_ON(ret);
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
- struct dentry *parent = dget_parent(new_dentry);
+ struct dentry *parent = new_dentry->d_parent;
btrfs_log_new_name(trans, old_inode, old_dir, parent);
- dput(parent);
btrfs_end_log_trans(root);
}
out_fail:
@@ -7181,21 +7196,32 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
goto out_unlock;
}
+ /*
+ * If the active LSM wants to access the inode during
+ * d_instantiate it needs these. Smack checks to see
+ * if the filesystem supports xattrs by looking at the
+ * ops vector.
+ */
+ inode->i_fop = &btrfs_file_operations;
+ inode->i_op = &btrfs_file_inode_operations;
+
err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
if (err)
drop_inode = 1;
else {
inode->i_mapping->a_ops = &btrfs_aops;
inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
- inode->i_fop = &btrfs_file_operations;
- inode->i_op = &btrfs_file_inode_operations;
BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
}
if (drop_inode)
goto out_unlock;
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path) {
+ err = -ENOMEM;
+ drop_inode = 1;
+ goto out_unlock;
+ }
key.objectid = btrfs_ino(inode);
key.offset = 0;
btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
@@ -7233,6 +7259,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
drop_inode = 1;
out_unlock:
+ if (!err)
+ d_instantiate(dentry, inode);
nr = trans->blocks_used;
btrfs_end_transaction_throttle(trans, root);
if (drop_inode) {
@@ -7332,15 +7360,19 @@ static int btrfs_set_page_dirty(struct page *page)
return __set_page_dirty_nobuffers(page);
}
-static int btrfs_permission(struct inode *inode, int mask, unsigned int flags)
+static int btrfs_permission(struct inode *inode, int mask)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
+ umode_t mode = inode->i_mode;
- if (btrfs_root_readonly(root) && (mask & MAY_WRITE))
- return -EROFS;
- if ((BTRFS_I(inode)->flags & BTRFS_INODE_READONLY) && (mask & MAY_WRITE))
- return -EACCES;
- return generic_permission(inode, mask, flags, btrfs_check_acl);
+ if (mask & MAY_WRITE &&
+ (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))) {
+ if (btrfs_root_readonly(root))
+ return -EROFS;
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
+ return -EACCES;
+ }
+ return generic_permission(inode, mask);
}
static const struct inode_operations btrfs_dir_inode_operations = {
@@ -7360,10 +7392,12 @@ static const struct inode_operations btrfs_dir_inode_operations = {
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
+ .get_acl = btrfs_get_acl,
};
static const struct inode_operations btrfs_dir_ro_inode_operations = {
.lookup = btrfs_lookup,
.permission = btrfs_permission,
+ .get_acl = btrfs_get_acl,
};
static const struct file_operations btrfs_dir_file_operations = {
@@ -7385,7 +7419,6 @@ static struct extent_io_ops btrfs_extent_io_ops = {
.readpage_end_io_hook = btrfs_readpage_end_io_hook,
.writepage_end_io_hook = btrfs_writepage_end_io_hook,
.writepage_start_hook = btrfs_writepage_start_hook,
- .readpage_io_failed_hook = btrfs_io_failed_hook,
.set_bit_hook = btrfs_set_bit_hook,
.clear_bit_hook = btrfs_clear_bit_hook,
.merge_extent_hook = btrfs_merge_extent_hook,
@@ -7432,6 +7465,7 @@ static const struct inode_operations btrfs_file_inode_operations = {
.removexattr = btrfs_removexattr,
.permission = btrfs_permission,
.fiemap = btrfs_fiemap,
+ .get_acl = btrfs_get_acl,
};
static const struct inode_operations btrfs_special_inode_operations = {
.getattr = btrfs_getattr,
@@ -7441,19 +7475,23 @@ static const struct inode_operations btrfs_special_inode_operations = {
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
+ .get_acl = btrfs_get_acl,
};
static const struct inode_operations btrfs_symlink_inode_operations = {
.readlink = generic_readlink,
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.getattr = btrfs_getattr,
+ .setattr = btrfs_setattr,
.permission = btrfs_permission,
.setxattr = btrfs_setxattr,
.getxattr = btrfs_getxattr,
.listxattr = btrfs_listxattr,
.removexattr = btrfs_removexattr,
+ .get_acl = btrfs_get_acl,
};
const struct dentry_operations btrfs_dentry_operations = {
.d_delete = btrfs_dentry_delete,
+ .d_release = btrfs_dentry_release,
};