From 37a8457773c266cdb77fcddec008cd73e81786be Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Fri, 11 May 2012 16:34:10 +0200 Subject: block: don't mark buffers beyond end of disk as mapped commit 080399aaaf3531f5b8761ec0ac30ff98891e8686 upstream. Hi, We have a bug report open where a squashfs image mounted on ppc64 would exhibit errors due to trying to read beyond the end of the disk. It can easily be reproduced by doing the following: [root@ibm-p750e-02-lp3 ~]# ls -l install.img -rw-r--r-- 1 root root 142032896 Apr 30 16:46 install.img [root@ibm-p750e-02-lp3 ~]# mount -o loop ./install.img /mnt/test [root@ibm-p750e-02-lp3 ~]# dd if=/dev/loop0 of=/dev/null dd: reading `/dev/loop0': Input/output error 277376+0 records in 277376+0 records out 142016512 bytes (142 MB) copied, 0.9465 s, 150 MB/s In dmesg, you'll find the following: squashfs: version 4.0 (2009/01/31) Phillip Lougher [ 43.106012] attempt to access beyond end of device [ 43.106029] loop0: rw=0, want=277410, limit=277408 [ 43.106039] Buffer I/O error on device loop0, logical block 138704 [ 43.106053] attempt to access beyond end of device [ 43.106057] loop0: rw=0, want=277412, limit=277408 [ 43.106061] Buffer I/O error on device loop0, logical block 138705 [ 43.106066] attempt to access beyond end of device [ 43.106070] loop0: rw=0, want=277414, limit=277408 [ 43.106073] Buffer I/O error on device loop0, logical block 138706 [ 43.106078] attempt to access beyond end of device [ 43.106081] loop0: rw=0, want=277416, limit=277408 [ 43.106085] Buffer I/O error on device loop0, logical block 138707 [ 43.106089] attempt to access beyond end of device [ 43.106093] loop0: rw=0, want=277418, limit=277408 [ 43.106096] Buffer I/O error on device loop0, logical block 138708 [ 43.106101] attempt to access beyond end of device [ 43.106104] loop0: rw=0, want=277420, limit=277408 [ 43.106108] Buffer I/O error on device loop0, logical block 138709 [ 43.106112] attempt to access beyond end of device [ 43.106116] loop0: rw=0, want=277422, limit=277408 [ 43.106120] Buffer I/O error on device loop0, logical block 138710 [ 43.106124] attempt to access beyond end of device [ 43.106128] loop0: rw=0, want=277424, limit=277408 [ 43.106131] Buffer I/O error on device loop0, logical block 138711 [ 43.106135] attempt to access beyond end of device [ 43.106139] loop0: rw=0, want=277426, limit=277408 [ 43.106143] Buffer I/O error on device loop0, logical block 138712 [ 43.106147] attempt to access beyond end of device [ 43.106151] loop0: rw=0, want=277428, limit=277408 [ 43.106154] Buffer I/O error on device loop0, logical block 138713 [ 43.106158] attempt to access beyond end of device [ 43.106162] loop0: rw=0, want=277430, limit=277408 [ 43.106166] attempt to access beyond end of device [ 43.106169] loop0: rw=0, want=277432, limit=277408 ... [ 43.106307] attempt to access beyond end of device [ 43.106311] loop0: rw=0, want=277470, limit=2774 Squashfs manages to read in the end block(s) of the disk during the mount operation. Then, when dd reads the block device, it leads to block_read_full_page being called with buffers that are beyond end of disk, but are marked as mapped. Thus, it would end up submitting read I/O against them, resulting in the errors mentioned above. I fixed the problem by modifying init_page_buffers to only set the buffer mapped if it fell inside of i_size. Cheers, Jeff Signed-off-by: Jeff Moyer Acked-by: Nick Piggin -- Changes from v1->v2: re-used max_block, as suggested by Nick Piggin. Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 1a80b04..330cbce 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -968,6 +968,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; int uptodate = PageUptodate(page); + sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); do { if (!buffer_mapped(bh)) { @@ -976,7 +977,8 @@ init_page_buffers(struct page *page, struct block_device *bdev, bh->b_blocknr = block; if (uptodate) set_buffer_uptodate(bh); - set_buffer_mapped(bh); + if (block < end_block) + set_buffer_mapped(bh); } block++; bh = bh->b_this_page; -- cgit v1.1 From 4ff1ddad40c57605cc33a78699e4559217a06a46 Mon Sep 17 00:00:00 2001 From: Jeff Moyer Date: Thu, 12 Jul 2012 09:43:14 -0400 Subject: block: fix infinite loop in __getblk_slow commit 91f68c89d8f35fe98ea04159b9a3b42d0149478f upstream. Commit 080399aaaf35 ("block: don't mark buffers beyond end of disk as mapped") exposed a bug in __getblk_slow that causes mount to hang as it loops infinitely waiting for a buffer that lies beyond the end of the disk to become uptodate. The problem was initially reported by Torsten Hilbrich here: https://lkml.org/lkml/2012/6/18/54 and also reported independently here: http://www.sysresccd.org/forums/viewtopic.php?f=13&t=4511 and then Richard W.M. Jones and Marcos Mello noted a few separate bugzillas also associated with the same issue. This patch has been confirmed to fix: https://bugzilla.redhat.com/show_bug.cgi?id=835019 The main problem is here, in __getblk_slow: for (;;) { struct buffer_head * bh; int ret; bh = __find_get_block(bdev, block, size); if (bh) return bh; ret = grow_buffers(bdev, block, size); if (ret < 0) return NULL; if (ret == 0) free_more_memory(); } __find_get_block does not find the block, since it will not be marked as mapped, and so grow_buffers is called to fill in the buffers for the associated page. I believe the for (;;) loop is there primarily to retry in the case of memory pressure keeping grow_buffers from succeeding. However, we also continue to loop for other cases, like the block lying beond the end of the disk. So, the fix I came up with is to only loop when grow_buffers fails due to memory allocation issues (return value of 0). The attached patch was tested by myself, Torsten, and Rich, and was found to resolve the problem in call cases. Signed-off-by: Jeff Moyer Reported-and-Tested-by: Torsten Hilbrich Tested-by: Richard W.M. Jones Reviewed-by: Josh Boyer [ Jens is on vacation, taking this directly - Linus ] Signed-off-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 330cbce..d421626 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1084,6 +1084,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { + int ret; + struct buffer_head *bh; + /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1096,20 +1099,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } - for (;;) { - struct buffer_head * bh; - int ret; +retry: + bh = __find_get_block(bdev, block, size); + if (bh) + return bh; + ret = grow_buffers(bdev, block, size); + if (ret == 0) { + free_more_memory(); + goto retry; + } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; - - ret = grow_buffers(bdev, block, size); - if (ret < 0) - return NULL; - if (ret == 0) - free_more_memory(); } + return NULL; } /* -- cgit v1.1 From 72013257f37b157958a9b3a9a102fc6d3f7dde0a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 23 Aug 2012 12:17:36 +0200 Subject: block: replace __getblk_slow misfix by grow_dev_page fix commit 676ce6d5ca3098339c028d44fe0427d1566a4d2d upstream. Commit 91f68c89d8f3 ("block: fix infinite loop in __getblk_slow") is not good: a successful call to grow_buffers() cannot guarantee that the page won't be reclaimed before the immediate next call to __find_get_block(), which is why there was always a loop there. Yesterday I got "EXT4-fs error (device loop0): __ext4_get_inode_loc:3595: inode #19278: block 664: comm cc1: unable to read itable block" on console, which pointed to this commit. I've been trying to bisect for weeks, why kbuild-on-ext4-on-loop-on-tmpfs sometimes fails from a missing header file, under memory pressure on ppc G5. I've never seen this on x86, and I've never seen it on 3.5-rc7 itself, despite that commit being in there: bisection pointed to an irrelevant pinctrl merge, but hard to tell when failure takes between 18 minutes and 38 hours (but so far it's happened quicker on 3.6-rc2). (I've since found such __ext4_get_inode_loc errors in /var/log/messages from previous weeks: why the message never appeared on console until yesterday morning is a mystery for another day.) Revert 91f68c89d8f3, restoring __getblk_slow() to how it was (plus a checkpatch nitfix). Simplify the interface between grow_buffers() and grow_dev_page(), and avoid the infinite loop beyond end of device by instead checking init_page_buffers()'s end_block there (I presume that's more efficient than a repeated call to blkdev_max_block()), returning -ENXIO to __getblk_slow() in that case. And remove akpm's ten-year-old "__getblk() cannot fail ... weird" comment, but that is worrying: are all users of __getblk() really now prepared for a NULL bh beyond end of device, or will some oops?? Signed-off-by: Hugh Dickins Signed-off-by: Jens Axboe Signed-off-by: Greg Kroah-Hartman --- fs/buffer.c | 66 ++++++++++++++++++++++++++++--------------------------------- 1 file changed, 30 insertions(+), 36 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index d421626..166028b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -961,7 +961,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) /* * Initialise the state of a blockdev page's buffers. */ -static void +static sector_t init_page_buffers(struct page *page, struct block_device *bdev, sector_t block, int size) { @@ -983,33 +983,41 @@ init_page_buffers(struct page *page, struct block_device *bdev, block++; bh = bh->b_this_page; } while (bh != head); + + /* + * Caller needs to validate requested block against end of device. + */ + return end_block; } /* * Create the page-cache page that contains the requested block. * - * This is user purely for blockdev mappings. + * This is used purely for blockdev mappings. */ -static struct page * +static int grow_dev_page(struct block_device *bdev, sector_t block, - pgoff_t index, int size) + pgoff_t index, int size, int sizebits) { struct inode *inode = bdev->bd_inode; struct page *page; struct buffer_head *bh; + sector_t end_block; + int ret = 0; /* Will call free_more_memory() */ page = find_or_create_page(inode->i_mapping, index, (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE); if (!page) - return NULL; + return ret; BUG_ON(!PageLocked(page)); if (page_has_buffers(page)) { bh = page_buffers(page); if (bh->b_size == size) { - init_page_buffers(page, bdev, block, size); - return page; + end_block = init_page_buffers(page, bdev, + index << sizebits, size); + goto done; } if (!try_to_free_buffers(page)) goto failed; @@ -1029,15 +1037,15 @@ grow_dev_page(struct block_device *bdev, sector_t block, */ spin_lock(&inode->i_mapping->private_lock); link_dev_buffers(page, bh); - init_page_buffers(page, bdev, block, size); + end_block = init_page_buffers(page, bdev, index << sizebits, size); spin_unlock(&inode->i_mapping->private_lock); - return page; +done: + ret = (block < end_block) ? 1 : -ENXIO; failed: - BUG(); unlock_page(page); page_cache_release(page); - return NULL; + return ret; } /* @@ -1047,7 +1055,6 @@ failed: static int grow_buffers(struct block_device *bdev, sector_t block, int size) { - struct page *page; pgoff_t index; int sizebits; @@ -1071,22 +1078,14 @@ grow_buffers(struct block_device *bdev, sector_t block, int size) bdevname(bdev, b)); return -EIO; } - block = index << sizebits; + /* Create a page with the proper size buffers.. */ - page = grow_dev_page(bdev, block, index, size); - if (!page) - return 0; - unlock_page(page); - page_cache_release(page); - return 1; + return grow_dev_page(bdev, block, index, size, sizebits); } static struct buffer_head * __getblk_slow(struct block_device *bdev, sector_t block, int size) { - int ret; - struct buffer_head *bh; - /* Size must be multiple of hard sectorsize */ if (unlikely(size & (bdev_logical_block_size(bdev)-1) || (size < 512 || size > PAGE_SIZE))) { @@ -1099,21 +1098,20 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) return NULL; } -retry: - bh = __find_get_block(bdev, block, size); - if (bh) - return bh; + for (;;) { + struct buffer_head *bh; + int ret; - ret = grow_buffers(bdev, block, size); - if (ret == 0) { - free_more_memory(); - goto retry; - } else if (ret > 0) { bh = __find_get_block(bdev, block, size); if (bh) return bh; + + ret = grow_buffers(bdev, block, size); + if (ret < 0) + return NULL; + if (ret == 0) + free_more_memory(); } - return NULL; } /* @@ -1369,10 +1367,6 @@ EXPORT_SYMBOL(__find_get_block); * which corresponds to the passed block_device, block and size. The * returned buffer has its reference count incremented. * - * __getblk() cannot fail - it just keeps trying. If you pass it an - * illegal block number, __getblk() will happily return a buffer_head - * which represents the non-existent block. Very weird. - * * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers() * attempt is failing. FIXME, perhaps? */ -- cgit v1.1