aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/extent_io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r--fs/btrfs/extent_io.c67
1 files changed, 60 insertions, 7 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 05937a8..9a837a8 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2444,7 +2444,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_end_io_t end_io_func,
int mirror_num,
unsigned long prev_bio_flags,
- unsigned long bio_flags)
+ unsigned long bio_flags,
+ bool force_bio_submit)
{
int ret = 0;
struct bio *bio;
@@ -2463,6 +2464,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
sector;
if (prev_bio_flags != bio_flags || !contig ||
+ force_bio_submit ||
(tree->ops && tree->ops->merge_bio_hook &&
tree->ops->merge_bio_hook(page, offset, page_size, bio,
bio_flags)) ||
@@ -2519,7 +2521,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct page *page,
get_extent_t *get_extent,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags)
+ unsigned long *bio_flags,
+ u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
@@ -2575,6 +2578,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
}
while (cur <= end) {
+ bool force_bio_submit = false;
+
if (cur >= last_byte) {
char *userpage;
struct extent_state *cached = NULL;
@@ -2621,6 +2626,49 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
block_start = em->block_start;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
block_start = EXTENT_MAP_HOLE;
+
+ /*
+ * If we have a file range that points to a compressed extent
+ * and it's followed by a consecutive file range that points to
+ * to the same compressed extent (possibly with a different
+ * offset and/or length, so it either points to the whole extent
+ * or only part of it), we must make sure we do not submit a
+ * single bio to populate the pages for the 2 ranges because
+ * this makes the compressed extent read zero out the pages
+ * belonging to the 2nd range. Imagine the following scenario:
+ *
+ * File layout
+ * [0 - 8K] [8K - 24K]
+ * | |
+ * | |
+ * points to extent X, points to extent X,
+ * offset 4K, length of 8K offset 0, length 16K
+ *
+ * [extent X, compressed length = 4K uncompressed length = 16K]
+ *
+ * If the bio to read the compressed extent covers both ranges,
+ * it will decompress extent X into the pages belonging to the
+ * first range and then it will stop, zeroing out the remaining
+ * pages that belong to the other range that points to extent X.
+ * So here we make sure we submit 2 bios, one for the first
+ * range and another one for the third range. Both will target
+ * the same physical extent from disk, but we can't currently
+ * make the compressed bio endio callback populate the pages
+ * for both ranges because each compressed bio is tightly
+ * coupled with a single extent map, and each range can have
+ * an extent map with a different offset value relative to the
+ * uncompressed data of our extent and different lengths. This
+ * is a corner case so we prioritize correctness over
+ * non-optimal behavior (submitting 2 bios for the same extent).
+ */
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
+ prev_em_start && *prev_em_start != (u64)-1 &&
+ *prev_em_start != em->orig_start)
+ force_bio_submit = true;
+
+ if (prev_em_start)
+ *prev_em_start = em->orig_start;
+
free_extent_map(em);
em = NULL;
@@ -2675,7 +2723,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
- this_bio_flag);
+ this_bio_flag,
+ force_bio_submit);
nr++;
*bio_flags = this_bio_flag;
}
@@ -2701,7 +2750,7 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
int ret;
ret = __extent_read_full_page(tree, page, get_extent, &bio, mirror_num,
- &bio_flags);
+ &bio_flags, NULL);
if (bio)
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
return ret;
@@ -2960,7 +3009,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
- 0, 0, 0);
+ 0, 0, 0, false);
if (ret)
SetPageError(page);
}
@@ -3219,6 +3268,7 @@ int extent_readpages(struct extent_io_tree *tree,
struct bio *bio = NULL;
unsigned page_idx;
unsigned long bio_flags = 0;
+ u64 prev_em_start = (u64)-1;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, lru);
@@ -3228,7 +3278,8 @@ int extent_readpages(struct extent_io_tree *tree,
if (!add_to_page_cache_lru(page, mapping,
page->index, GFP_NOFS)) {
__extent_read_full_page(tree, page, get_extent,
- &bio, 0, &bio_flags);
+ &bio, 0, &bio_flags,
+ &prev_em_start);
}
page_cache_release(page);
}
@@ -3998,6 +4049,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
unsigned long num_pages;
struct bio *bio = NULL;
unsigned long bio_flags = 0;
+ u64 prev_em_start = (u64)-1;
if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))
return 0;
@@ -4053,7 +4105,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
ClearPageError(page);
err = __extent_read_full_page(tree, page,
get_extent, &bio,
- mirror_num, &bio_flags);
+ mirror_num, &bio_flags,
+ &prev_em_start);
if (err)
ret = err;
} else {