aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c293
1 files changed, 251 insertions, 42 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index fa4ef18..97fbe93 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -74,7 +74,7 @@ struct inode_defrag {
* If an existing record is found the defrag item you
* pass in is freed
*/
-static int __btrfs_add_inode_defrag(struct inode *inode,
+static void __btrfs_add_inode_defrag(struct inode *inode,
struct inode_defrag *defrag)
{
struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -106,11 +106,11 @@ static int __btrfs_add_inode_defrag(struct inode *inode,
BTRFS_I(inode)->in_defrag = 1;
rb_link_node(&defrag->rb_node, parent, p);
rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes);
- return 0;
+ return;
exists:
kfree(defrag);
- return 0;
+ return;
}
@@ -123,7 +123,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
{
struct btrfs_root *root = BTRFS_I(inode)->root;
struct inode_defrag *defrag;
- int ret = 0;
u64 transid;
if (!btrfs_test_opt(root, AUTO_DEFRAG))
@@ -150,9 +149,11 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->defrag_inodes_lock);
if (!BTRFS_I(inode)->in_defrag)
- ret = __btrfs_add_inode_defrag(inode, defrag);
+ __btrfs_add_inode_defrag(inode, defrag);
+ else
+ kfree(defrag);
spin_unlock(&root->fs_info->defrag_inodes_lock);
- return ret;
+ return 0;
}
/*
@@ -855,7 +856,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
- BUG_ON(!path);
+ if (!path)
+ return -ENOMEM;
again:
recow = 0;
split = start;
@@ -1034,11 +1036,13 @@ out:
* on error we return an unlocked page and the error value
* on success we return a locked page and 0
*/
-static int prepare_uptodate_page(struct page *page, u64 pos)
+static int prepare_uptodate_page(struct page *page, u64 pos,
+ bool force_uptodate)
{
int ret = 0;
- if ((pos & (PAGE_CACHE_SIZE - 1)) && !PageUptodate(page)) {
+ if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
+ !PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
if (ret)
return ret;
@@ -1059,12 +1063,13 @@ static int prepare_uptodate_page(struct page *page, u64 pos)
static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
struct page **pages, size_t num_pages,
loff_t pos, unsigned long first_index,
- unsigned long last_index, size_t write_bytes)
+ size_t write_bytes, bool force_uptodate)
{
struct extent_state *cached_state = NULL;
int i;
unsigned long index = pos >> PAGE_CACHE_SHIFT;
struct inode *inode = fdentry(file)->d_inode;
+ gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
int err = 0;
int faili = 0;
u64 start_pos;
@@ -1073,15 +1078,10 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
start_pos = pos & ~((u64)root->sectorsize - 1);
last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT;
- if (start_pos > inode->i_size) {
- err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
- if (err)
- return err;
- }
-
again:
for (i = 0; i < num_pages; i++) {
- pages[i] = grab_cache_page(inode->i_mapping, index + i);
+ pages[i] = find_or_create_page(inode->i_mapping, index + i,
+ mask);
if (!pages[i]) {
faili = i - 1;
err = -ENOMEM;
@@ -1089,10 +1089,11 @@ again:
}
if (i == 0)
- err = prepare_uptodate_page(pages[i], pos);
+ err = prepare_uptodate_page(pages[i], pos,
+ force_uptodate);
if (i == num_pages - 1)
err = prepare_uptodate_page(pages[i],
- pos + write_bytes);
+ pos + write_bytes, false);
if (err) {
page_cache_release(pages[i]);
faili = i - 1;
@@ -1158,20 +1159,21 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct page **pages = NULL;
unsigned long first_index;
- unsigned long last_index;
size_t num_written = 0;
int nrptrs;
int ret = 0;
+ bool force_page_uptodate = false;
nrptrs = min((iov_iter_count(i) + PAGE_CACHE_SIZE - 1) /
PAGE_CACHE_SIZE, PAGE_CACHE_SIZE /
(sizeof(struct page *)));
+ nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
+ nrptrs = max(nrptrs, 8);
pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
if (!pages)
return -ENOMEM;
first_index = pos >> PAGE_CACHE_SHIFT;
- last_index = (pos + iov_iter_count(i)) >> PAGE_CACHE_SHIFT;
while (iov_iter_count(i) > 0) {
size_t offset = pos & (PAGE_CACHE_SIZE - 1);
@@ -1205,8 +1207,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* contents of pages from loop to loop
*/
ret = prepare_pages(root, file, pages, num_pages,
- pos, first_index, last_index,
- write_bytes);
+ pos, first_index, write_bytes,
+ force_page_uptodate);
if (ret) {
btrfs_delalloc_release_space(inode,
num_pages << PAGE_CACHE_SHIFT);
@@ -1223,12 +1225,15 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
if (copied < write_bytes)
nrptrs = 1;
- if (copied == 0)
+ if (copied == 0) {
+ force_page_uptodate = true;
dirty_pages = 0;
- else
+ } else {
+ force_page_uptodate = false;
dirty_pages = (copied + offset +
PAGE_CACHE_SIZE - 1) >>
PAGE_CACHE_SHIFT;
+ }
/*
* If we had a short copy we need to release the excess delaloc
@@ -1238,9 +1243,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
* managed to copy.
*/
if (num_pages > dirty_pages) {
- if (copied > 0)
- atomic_inc(
- &BTRFS_I(inode)->outstanding_extents);
+ if (copied > 0) {
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ }
btrfs_delalloc_release_space(inode,
(num_pages - dirty_pages) <<
PAGE_CACHE_SHIFT);
@@ -1336,6 +1343,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
struct inode *inode = fdentry(file)->d_inode;
struct btrfs_root *root = BTRFS_I(inode)->root;
loff_t *ppos = &iocb->ki_pos;
+ u64 start_pos;
ssize_t num_written = 0;
ssize_t err = 0;
size_t count, ocount;
@@ -1381,9 +1389,22 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
goto out;
}
- file_update_time(file);
+ err = btrfs_update_time(file);
+ if (err) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
BTRFS_I(inode)->sequence++;
+ start_pos = round_down(pos, root->sectorsize);
+ if (start_pos > i_size_read(inode)) {
+ err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
+ if (err) {
+ mutex_unlock(&inode->i_mutex);
+ goto out;
+ }
+ }
+
if (unlikely(file->f_flags & O_DIRECT)) {
num_written = __btrfs_direct_write(iocb, iov, nr_segs,
pos, ppos, count, ocount);
@@ -1452,7 +1473,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
* important optimization for directories because holding the mutex prevents
* new operations on the dir while we write to disk.
*/
-int btrfs_sync_file(struct file *file, int datasync)
+int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -1462,9 +1483,13 @@ int btrfs_sync_file(struct file *file, int datasync)
trace_btrfs_sync_file(file, datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
/* we wait first, since the writeback may change the inode */
root->log_batch++;
- /* the VFS called filemap_fdatawrite for us */
btrfs_wait_ordered_range(inode, 0, (u64)-1);
root->log_batch++;
@@ -1472,8 +1497,10 @@ int btrfs_sync_file(struct file *file, int datasync)
* check the transaction that last modified this inode
* and see if its already been committed
*/
- if (!BTRFS_I(inode)->last_trans)
+ if (!BTRFS_I(inode)->last_trans) {
+ mutex_unlock(&inode->i_mutex);
goto out;
+ }
/*
* if the last transaction that changed this file was before
@@ -1484,6 +1511,7 @@ int btrfs_sync_file(struct file *file, int datasync)
if (BTRFS_I(inode)->last_trans <=
root->fs_info->last_trans_committed) {
BTRFS_I(inode)->last_trans = 0;
+ mutex_unlock(&inode->i_mutex);
goto out;
}
@@ -1496,12 +1524,15 @@ int btrfs_sync_file(struct file *file, int datasync)
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
+ mutex_unlock(&inode->i_mutex);
goto out;
}
ret = btrfs_log_dentry_safe(trans, root, dentry);
- if (ret < 0)
+ if (ret < 0) {
+ mutex_unlock(&inode->i_mutex);
goto out;
+ }
/* we've logged all the items and now have a consistent
* version of the file in the log. It is possible that
@@ -1513,7 +1544,7 @@ int btrfs_sync_file(struct file *file, int datasync)
* file again, but that will end up using the synchronization
* inside btrfs_sync_log to keep things safe.
*/
- mutex_unlock(&dentry->d_inode->i_mutex);
+ mutex_unlock(&inode->i_mutex);
if (ret != BTRFS_NO_LOG_SYNC) {
if (ret > 0) {
@@ -1528,7 +1559,6 @@ int btrfs_sync_file(struct file *file, int datasync)
} else {
ret = btrfs_end_transaction(trans, root);
}
- mutex_lock(&dentry->d_inode->i_mutex);
out:
return ret > 0 ? -EIO : ret;
}
@@ -1592,10 +1622,6 @@ static long btrfs_fallocate(struct file *file, int mode,
goto out;
}
- ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
- if (ret)
- goto out;
-
locked_end = alloc_end - 1;
while (1) {
struct btrfs_ordered_extent *ordered;
@@ -1629,23 +1655,53 @@ static long btrfs_fallocate(struct file *file, int mode,
cur_offset = alloc_start;
while (1) {
+ u64 actual_end;
+
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
alloc_end - cur_offset, 0);
BUG_ON(IS_ERR_OR_NULL(em));
last_byte = min(extent_map_end(em), alloc_end);
+ actual_end = min_t(u64, extent_map_end(em), offset + len);
last_byte = (last_byte + mask) & ~mask;
+
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+
+ /*
+ * Make sure we have enough space before we do the
+ * allocation.
+ */
+ ret = btrfs_check_data_free_space(inode, last_byte -
+ cur_offset);
+ if (ret) {
+ free_extent_map(em);
+ break;
+ }
+
ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
last_byte - cur_offset,
1 << inode->i_blkbits,
offset + len,
&alloc_hint);
+
+ /* Let go of our reservation. */
+ btrfs_free_reserved_data_space(inode, last_byte -
+ cur_offset);
if (ret < 0) {
free_extent_map(em);
break;
}
+ } else if (actual_end > inode->i_size &&
+ !(mode & FALLOC_FL_KEEP_SIZE)) {
+ /*
+ * We didn't need to allocate any more space, but we
+ * still extended the size of the file so we need to
+ * update i_size.
+ */
+ inode->i_ctime = CURRENT_TIME;
+ i_size_write(inode, actual_end);
+ btrfs_ordered_update_i_size(inode, actual_end, NULL);
}
free_extent_map(em);
@@ -1657,15 +1713,168 @@ static long btrfs_fallocate(struct file *file, int mode,
}
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
&cached_state, GFP_NOFS);
-
- btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
}
+static int find_desired_extent(struct inode *inode, loff_t *offset, int origin)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_map *em;
+ struct extent_state *cached_state = NULL;
+ u64 lockstart = *offset;
+ u64 lockend = i_size_read(inode);
+ u64 start = *offset;
+ u64 orig_start = *offset;
+ u64 len = i_size_read(inode);
+ u64 last_end = 0;
+ int ret = 0;
+
+ lockend = max_t(u64, root->sectorsize, lockend);
+ if (lockend <= lockstart)
+ lockend = lockstart + root->sectorsize;
+
+ len = lockend - lockstart + 1;
+
+ len = max_t(u64, len, root->sectorsize);
+ if (inode->i_size == 0)
+ return -ENXIO;
+
+ lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, 0,
+ &cached_state, GFP_NOFS);
+
+ /*
+ * Delalloc is such a pain. If we have a hole and we have pending
+ * delalloc for a portion of the hole we will get back a hole that
+ * exists for the entire range since it hasn't been actually written
+ * yet. So to take care of this case we need to look for an extent just
+ * before the position we want in case there is outstanding delalloc
+ * going on here.
+ */
+ if (origin == SEEK_HOLE && start != 0) {
+ if (start <= root->sectorsize)
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, 0,
+ root->sectorsize, 0);
+ else
+ em = btrfs_get_extent_fiemap(inode, NULL, 0,
+ start - root->sectorsize,
+ root->sectorsize, 0);
+ if (IS_ERR(em)) {
+ ret = -ENXIO;
+ goto out;
+ }
+ last_end = em->start + em->len;
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+ free_extent_map(em);
+ }
+
+ while (1) {
+ em = btrfs_get_extent_fiemap(inode, NULL, 0, start, len, 0);
+ if (IS_ERR(em)) {
+ ret = -ENXIO;
+ break;
+ }
+
+ if (em->block_start == EXTENT_MAP_HOLE) {
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ if (last_end <= orig_start) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ if (origin == SEEK_HOLE) {
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ } else {
+ if (origin == SEEK_DATA) {
+ if (em->block_start == EXTENT_MAP_DELALLOC) {
+ if (start >= inode->i_size) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ }
+
+ *offset = start;
+ free_extent_map(em);
+ break;
+ }
+ }
+
+ start = em->start + em->len;
+ last_end = em->start + em->len;
+
+ if (em->block_start == EXTENT_MAP_DELALLOC)
+ last_end = min_t(u64, last_end, inode->i_size);
+
+ if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ free_extent_map(em);
+ ret = -ENXIO;
+ break;
+ }
+ free_extent_map(em);
+ cond_resched();
+ }
+ if (!ret)
+ *offset = min(*offset, inode->i_size);
+out:
+ unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
+ &cached_state, GFP_NOFS);
+ return ret;
+}
+
+static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ mutex_lock(&inode->i_mutex);
+ switch (origin) {
+ case SEEK_END:
+ case SEEK_CUR:
+ offset = generic_file_llseek(file, offset, origin);
+ goto out;
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ if (offset >= i_size_read(inode)) {
+ mutex_unlock(&inode->i_mutex);
+ return -ENXIO;
+ }
+
+ ret = find_desired_extent(inode, &offset, origin);
+ if (ret) {
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+ }
+ }
+
+ if (offset < 0 && !(file->f_mode & FMODE_UNSIGNED_OFFSET)) {
+ offset = -EINVAL;
+ goto out;
+ }
+ if (offset > inode->i_sb->s_maxbytes) {
+ offset = -EINVAL;
+ goto out;
+ }
+
+ /* Special lock needed here? */
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_version = 0;
+ }
+out:
+ mutex_unlock(&inode->i_mutex);
+ return offset;
+}
+
const struct file_operations btrfs_file_operations = {
- .llseek = generic_file_llseek,
+ .llseek = btrfs_file_llseek,
.read = do_sync_read,
.write = do_sync_write,
.aio_read = generic_file_aio_read,