aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/balloc.c49
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h6
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_i.h7
-rw-r--r--fs/ext4/extents.c32
-rw-r--r--fs/ext4/inode.c184
-rw-r--r--fs/ext4/mballoc.c20
-rw-r--r--fs/ext4/super.c5
9 files changed, 257 insertions, 50 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6369bac..495ab21 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1701,7 +1701,12 @@ ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode,
}
sbi = EXT4_SB(sb);
- *count = ext4_has_free_blocks(sbi, *count);
+ if (!EXT4_I(inode)->i_delalloc_reserved_flag) {
+ /*
+ * With delalloc we already reserved the blocks
+ */
+ *count = ext4_has_free_blocks(sbi, *count);
+ }
if (*count == 0) {
*errp = -ENOSPC;
return 0; /*return with ENOSPC error */
@@ -1902,7 +1907,8 @@ allocated:
le16_add_cpu(&gdp->bg_free_blocks_count, -num);
gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp);
spin_unlock(sb_bgl_lock(sbi, group_no));
- percpu_counter_sub(&sbi->s_freeblocks_counter, num);
+ if (!EXT4_I(inode)->i_delalloc_reserved_flag)
+ percpu_counter_sub(&sbi->s_freeblocks_counter, num);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, group_no);
@@ -1976,40 +1982,49 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode,
}
/*
- * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
+ * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
*
* @handle: handle to this transaction
* @inode: file inode
* @goal: given target block(filesystem wide)
+ * @count: total number of blocks need
* @errp: error code
*
- * Return allocated block number on success
+ * Return 1st allocated block numberon success, *count stores total account
+ * error stores in errp pointer
*/
-ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, int *errp)
+ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, unsigned long *count, int *errp)
{
- unsigned long count = 1;
- return do_blk_alloc(handle, inode, 0, goal,
- &count, errp, EXT4_META_BLOCK);
+ ext4_fsblk_t ret;
+ ret = do_blk_alloc(handle, inode, 0, goal,
+ count, errp, EXT4_META_BLOCK);
+ /*
+ * Account for the allocated meta blocks
+ */
+ if (!(*errp)) {
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ EXT4_I(inode)->i_allocated_meta_blocks += *count;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ }
+ return ret;
}
/*
- * ext4_new_meta_blocks() -- allocate block for meta data (indexing) blocks
+ * ext4_new_meta_block() -- allocate block for meta data (indexing) blocks
*
* @handle: handle to this transaction
* @inode: file inode
* @goal: given target block(filesystem wide)
- * @count: total number of blocks need
* @errp: error code
*
- * Return 1st allocated block numberon success, *count stores total account
- * error stores in errp pointer
+ * Return allocated block number on success
*/
-ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
- ext4_fsblk_t goal, unsigned long *count, int *errp)
+ext4_fsblk_t ext4_new_meta_block(handle_t *handle, struct inode *inode,
+ ext4_fsblk_t goal, int *errp)
{
- return do_blk_alloc(handle, inode, 0, goal,
- count, errp, EXT4_META_BLOCK);
+ unsigned long count = 1;
+ return ext4_new_meta_blocks(handle, inode, goal, &count, errp);
}
/*
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 5ed5108..d3d23d7 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -129,7 +129,8 @@ static int ext4_readdir(struct file * filp,
struct buffer_head *bh = NULL;
map_bh.b_state = 0;
- err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
+ err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh,
+ 0, 0, 0);
if (err > 0) {
pgoff_t index = map_bh.b_blocknr >>
(PAGE_CACHE_SHIFT - inode->i_blkbits);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ee9576d..0962f4e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -74,6 +74,9 @@
#define EXT4_MB_HINT_GOAL_ONLY 256
/* goal is meaningful */
#define EXT4_MB_HINT_TRY_GOAL 512
+/* blocks already pre-reserved by delayed allocation */
+#define EXT4_MB_DELALLOC_RESERVED 1024
+
struct ext4_allocation_request {
/* target inode for block we're allocating */
@@ -1041,6 +1044,7 @@ extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
/* inode.c */
+void ext4_da_release_space(struct inode *inode, int used, int to_free);
int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
struct buffer_head *bh, ext4_fsblk_t blocknr);
struct buffer_head *ext4_getblk(handle_t *, struct inode *,
@@ -1234,7 +1238,7 @@ extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
extern int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode,
sector_t block, unsigned long max_blocks,
struct buffer_head *bh, int create,
- int extend_disksize);
+ int extend_disksize, int flag);
#endif /* __KERNEL__ */
#endif /* _EXT4_H */
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 75333b5..6c166c0 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -212,6 +212,7 @@ static inline int ext4_ext_get_actual_len(struct ext4_extent *ext)
(le16_to_cpu(ext->ee_len) - EXT_INIT_MAX_LEN));
}
+extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
extern int ext4_extent_tree_init(handle_t *, struct inode *);
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index c2903ef..ef7409f 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -163,6 +163,13 @@ struct ext4_inode_info {
/* mballoc */
struct list_head i_prealloc_list;
spinlock_t i_prealloc_lock;
+
+ /* allocation reservation info for delalloc */
+ unsigned long i_reserved_data_blocks;
+ unsigned long i_reserved_meta_blocks;
+ unsigned long i_allocated_meta_blocks;
+ unsigned short i_delalloc_reserved_flag;
+ spinlock_t i_block_reservation_lock;
};
#endif /* _EXT4_I */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 7844bbb..dabc3b6 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -248,6 +248,36 @@ static int ext4_ext_space_root_idx(struct inode *inode)
return size;
}
+/*
+ * Calculate the number of metadata blocks needed
+ * to allocate @blocks
+ * Worse case is one block per extent
+ */
+int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ int lcap, icap, rcap, leafs, idxs, num;
+ int newextents = blocks;
+
+ rcap = ext4_ext_space_root_idx(inode);
+ lcap = ext4_ext_space_block(inode);
+ icap = ext4_ext_space_block_idx(inode);
+
+ /* number of new leaf blocks needed */
+ num = leafs = (newextents + lcap - 1) / lcap;
+
+ /*
+ * Worse case, we need separate index block(s)
+ * to link all new leaf blocks
+ */
+ idxs = (leafs + icap - 1) / icap;
+ do {
+ num += idxs;
+ idxs = (idxs + icap - 1) / icap;
+ } while (idxs > rcap);
+
+ return num;
+}
+
static int
ext4_ext_max_entries(struct inode *inode, int depth)
{
@@ -2910,7 +2940,7 @@ retry:
}
ret = ext4_get_blocks_wrap(handle, inode, block,
max_blocks, &map_bh,
- EXT4_CREATE_UNINITIALIZED_EXT, 0);
+ EXT4_CREATE_UNINITIALIZED_EXT, 0, 0);
if (ret <= 0) {
#ifdef EXT4FS_DEBUG
WARN_ON(ret <= 0);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2bef4f8..a6b800c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,6 +39,7 @@
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
+#include "ext4_extents.h"
static inline int ext4_begin_ordered_truncate(struct inode *inode,
loff_t new_size)
@@ -982,7 +983,7 @@ out:
*/
int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
unsigned long max_blocks, struct buffer_head *bh,
- int create, int extend_disksize)
+ int create, int extend_disksize, int flag)
{
int retval;
@@ -1023,6 +1024,15 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
* with create == 1 flag.
*/
down_write((&EXT4_I(inode)->i_data_sem));
+
+ /*
+ * if the caller is from delayed allocation writeout path
+ * we have already reserved fs blocks for allocation
+ * let the underlying get_block() function know to
+ * avoid double accounting
+ */
+ if (flag)
+ EXT4_I(inode)->i_delalloc_reserved_flag = 1;
/*
* We need to check for EXT4 here because migrate
* could have changed the inode type in between
@@ -1044,6 +1054,18 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
~EXT4_EXT_MIGRATE;
}
}
+
+ if (flag) {
+ EXT4_I(inode)->i_delalloc_reserved_flag = 0;
+ /*
+ * Update reserved blocks/metadata blocks
+ * after successful block allocation
+ * which were deferred till now
+ */
+ if ((retval > 0) && buffer_delay(bh))
+ ext4_da_release_space(inode, retval, 0);
+ }
+
up_write((&EXT4_I(inode)->i_data_sem));
return retval;
}
@@ -1069,7 +1091,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
}
ret = ext4_get_blocks_wrap(handle, inode, iblock,
- max_blocks, bh_result, create, 0);
+ max_blocks, bh_result, create, 0, 0);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
ret = 0;
@@ -1095,7 +1117,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
dummy.b_blocknr = -1000;
buffer_trace_init(&dummy.b_history);
err = ext4_get_blocks_wrap(handle, inode, block, 1,
- &dummy, create, 1);
+ &dummy, create, 1, 0);
/*
* ext4_get_blocks_handle() returns number of blocks
* mapped. 0 in case of a HOLE.
@@ -1409,6 +1431,122 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate @blocks for non extent file based file
+ */
+static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+ int ind_blks, dind_blks, tind_blks;
+
+ /* number of new indirect blocks needed */
+ ind_blks = (blocks + icap - 1) / icap;
+
+ dind_blks = (ind_blks + icap - 1) / icap;
+
+ tind_blks = 1;
+
+ return ind_blks + dind_blks + tind_blks;
+}
+
+/*
+ * Calculate the number of metadata blocks need to reserve
+ * to allocate given number of blocks
+ */
+static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+{
+ if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+ return ext4_ext_calc_metadata_amount(inode, blocks);
+
+ return ext4_indirect_calc_metadata_amount(inode, blocks);
+}
+
+static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ unsigned long md_needed, mdblocks, total = 0;
+
+ /*
+ * recalculate the amount of metadata blocks to reserve
+ * in order to allocate nrblocks
+ * worse case is one extent per block
+ */
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
+ mdblocks = ext4_calc_metadata_amount(inode, total);
+ BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
+
+ md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
+ total = md_needed + nrblocks;
+
+ if (ext4_has_free_blocks(sbi, total) < total) {
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return -ENOSPC;
+ }
+
+ /* reduce fs free blocks counter */
+ percpu_counter_sub(&sbi->s_freeblocks_counter, total);
+
+ EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
+ EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ return 0; /* success */
+}
+
+void ext4_da_release_space(struct inode *inode, int used, int to_free)
+{
+ struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+ int total, mdb, mdb_free, release;
+
+ spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
+ /* recalculate the number of metablocks still need to be reserved */
+ total = EXT4_I(inode)->i_reserved_data_blocks - used - to_free;
+ mdb = ext4_calc_metadata_amount(inode, total);
+
+ /* figure out how many metablocks to release */
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
+
+ /* Account for allocated meta_blocks */
+ mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
+
+ release = to_free + mdb_free;
+
+ /* update fs free blocks counter for truncate case */
+ percpu_counter_add(&sbi->s_freeblocks_counter, release);
+
+ /* update per-inode reservations */
+ BUG_ON(used + to_free > EXT4_I(inode)->i_reserved_data_blocks);
+ EXT4_I(inode)->i_reserved_data_blocks -= (used + to_free);
+
+ BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
+ EXT4_I(inode)->i_reserved_meta_blocks = mdb;
+ EXT4_I(inode)->i_allocated_meta_blocks = 0;
+ spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+}
+
+static void ext4_da_page_release_reservation(struct page *page,
+ unsigned long offset)
+{
+ int to_release = 0;
+ struct buffer_head *head, *bh;
+ unsigned int curr_off = 0;
+
+ head = page_buffers(page);
+ bh = head;
+ do {
+ unsigned int next_off = curr_off + bh->b_size;
+
+ if ((offset <= curr_off) && (buffer_delay(bh))) {
+ to_release++;
+ clear_buffer_delay(bh);
+ }
+ curr_off = next_off;
+ } while ((bh = bh->b_this_page) != head);
+ ext4_da_release_space(page->mapping->host, 0, to_release);
+}
/*
* Delayed allocation stuff
@@ -1829,14 +1967,18 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* preallocated blocks are unmapped but should treated
* the same as allocated blocks.
*/
- ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0);
- if (ret == 0) {
- /* the block isn't allocated yet, let's reserve space */
- /* XXX: call reservation here */
+ ret = ext4_get_blocks_wrap(NULL, inode, iblock, 1, bh_result, 0, 0, 0);
+ if ((ret == 0) && !buffer_delay(bh_result)) {
+ /* the block isn't (pre)allocated yet, let's reserve space */
/*
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
+ ret = ext4_da_reserve_space(inode, 1);
+ if (ret)
+ /* not enough space to reserve */
+ return ret;
+
map_bh(bh_result, inode->i_sb, 0);
set_buffer_new(bh_result);
set_buffer_delay(bh_result);
@@ -1847,7 +1989,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
return ret;
}
-
+#define EXT4_DELALLOC_RSVED 1
static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
@@ -1865,7 +2007,7 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock,
}
ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks,
- bh_result, create, 0);
+ bh_result, create, 0, EXT4_DELALLOC_RSVED);
if (ret > 0) {
bh_result->b_size = (ret << inode->i_blkbits);
@@ -1952,7 +2094,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- int ret;
+ int ret, retries = 0;
struct page *page;
pgoff_t index;
unsigned from, to;
@@ -1963,6 +2105,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
+retry:
/*
* With delayed allocation, we don't log the i_disksize update
* if there is delayed block allocation. But we still need
@@ -1988,6 +2131,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
page_cache_release(page);
}
+ if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
out:
return ret;
}
@@ -2040,9 +2185,6 @@ static int ext4_da_write_end(struct file *file,
static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
{
- struct buffer_head *head, *bh;
- unsigned int curr_off = 0;
-
/*
* Drop reserved blocks
*/
@@ -2050,21 +2192,7 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset)
if (!page_has_buffers(page))
goto out;
- head = page_buffers(page);
- bh = head;
- do {
- unsigned int next_off = curr_off + bh->b_size;
-
- /*
- * is this block fully invalidated?
- */
- if (offset <= curr_off && buffer_delay(bh)) {
- clear_buffer_delay(bh);
- /* XXX: add real stuff here */
- }
- curr_off = next_off;
- bh = bh->b_this_page;
- } while (bh != head);
+ ext4_da_page_release_reservation(page, offset);
out:
ext4_invalidatepage(page, offset);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 8d254ca..8d141a2 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2964,7 +2964,15 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len);
gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
- percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len);
+
+ /*
+ * free blocks account has already be reduced/reserved
+ * at write_begin() time for delayed allocation
+ * do not double accounting
+ */
+ if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
+ percpu_counter_sub(&sbi->s_freeblocks_counter,
+ ac->ac_b_ex.fe_len);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
@@ -4169,7 +4177,12 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
&(ar->len), errp);
return block;
}
- ar->len = ext4_has_free_blocks(sbi, ar->len);
+ if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) {
+ /*
+ * With delalloc we already reserved the blocks
+ */
+ ar->len = ext4_has_free_blocks(sbi, ar->len);
+ }
if (ar->len == 0) {
*errp = -ENOSPC;
@@ -4186,6 +4199,9 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
}
inquota = ar->len;
+ if (EXT4_I(ar->inode)->i_delalloc_reserved_flag)
+ ar->flags |= EXT4_MB_DELALLOC_RESERVED;
+
ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
if (!ac) {
ar->len = 0;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index de9d3d0..25e2f24 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -574,6 +574,11 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
INIT_LIST_HEAD(&ei->i_prealloc_list);
spin_lock_init(&ei->i_prealloc_lock);
jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
+ ei->i_reserved_data_blocks = 0;
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_allocated_meta_blocks = 0;
+ ei->i_delalloc_reserved_flag = 0;
+ spin_lock_init(&(ei->i_block_reservation_lock));
return &ei->vfs_inode;
}