From 797c09ed348c2e22715cbfaeb3ab658436753570 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 20 Feb 2012 23:06:18 -0500
Subject: ext4: avoid deadlock on sync-mounted FS w/o journal

commit c1bb05a657fb3d8c6179a4ef7980261fae4521d7 upstream.

Processes hang forever on a sync-mounted ext2 file system that
is mounted with the ext4 module (default in Fedora 16).

I can reproduce this reliably by mounting an ext2 partition with
"-o sync" and opening a new file an that partition with vim. vim
will hang in "D" state forever.  The same happens on ext4 without
a journal.

I am attaching a small patch here that solves this issue for me.
In the sync mounted case without a journal,
ext4_handle_dirty_metadata() may call sync_dirty_buffer(), which
can't be called with buffer lock held.

Also move mb_cache_entry_release inside lock to avoid race
fixed previously by 8a2bfdcb ext[34]: EA block reference count racing fix
Note too that ext2 fixed this same problem in 2006 with
b2f49033 [PATCH] fix deadlock in ext2

Signed-off-by: Martin.Wilck@ts.fujitsu.com
[sandeen@redhat.com: move mb_cache_entry_release before unlock, edit commit msg]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 19fe4e3..c2865cc 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -487,18 +487,19 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 		ext4_free_blocks(handle, inode, bh, 0, 1,
 				 EXT4_FREE_BLOCKS_METADATA |
 				 EXT4_FREE_BLOCKS_FORGET);
+		unlock_buffer(bh);
 	} else {
 		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
+		if (ce)
+			mb_cache_entry_release(ce);
+		unlock_buffer(bh);
 		error = ext4_handle_dirty_metadata(handle, inode, bh);
 		if (IS_SYNC(inode))
 			ext4_handle_sync(handle);
 		dquot_free_block(inode, 1);
 		ea_bdebug(bh, "refcount now=%d; releasing",
 			  le32_to_cpu(BHDR(bh)->h_refcount));
-		if (ce)
-			mb_cache_entry_release(ce);
 	}
-	unlock_buffer(bh);
 out:
 	ext4_std_error(inode->i_sb, error);
 	return;
-- 
cgit v1.1


From 1a28fbbebe6bd7e3f0338663302b3b3ce500e088 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 18 Dec 2011 17:37:02 -0500
Subject: ext4: fix error handling on inode bitmap corruption

commit acd6ad83517639e8f09a8c5525b1dccd81cd2a10 upstream.

When insert_inode_locked() fails in ext4_new_inode() it most likely means inode
bitmap got corrupted and we allocated again inode which is already in use. Also
doing unlock_new_inode() during error recovery is wrong since the inode does
not have I_NEW set. Fix the problem by jumping to fail: (instead of fail_drop:)
which declares filesystem error and does not call unlock_new_inode().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ialloc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 21bb2f6..412469b 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1021,8 +1021,12 @@ got:
 	if (IS_DIRSYNC(inode))
 		ext4_handle_sync(handle);
 	if (insert_inode_locked(inode) < 0) {
-		err = -EINVAL;
-		goto fail_drop;
+		/*
+		 * Likely a bitmap corruption causing inode to be allocated
+		 * twice.
+		 */
+		err = -EIO;
+		goto fail;
 	}
 	spin_lock(&sbi->s_next_gen_lock);
 	inode->i_generation = sbi->s_next_generation++;
-- 
cgit v1.1


From e36db7f818de59e3b0fdeaabda97e696a177b9a9 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 28 May 2012 14:17:25 -0400
Subject: ext4: force ro mount if ext4_setup_super() fails

commit 7e84b6216467b84cd332c8e567bf5aa113fd2f38 upstream.

If ext4_setup_super() fails i.e. due to a too-high revision,
the error is logged in dmesg but the fs is not mounted RO as
indicated.

Tested by:

# mkfs.ext4 -r 4 /dev/sdb6
# mount /dev/sdb6 /mnt/test
# dmesg | grep "too high"
[164919.759248] EXT4-fs (sdb6): revision level too high, forcing read-only mode
# grep sdb6 /proc/mounts
/dev/sdb6 /mnt/test2 ext4 rw,seclabel,relatime,data=ordered 0 0

Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index df121b2..be6c0cc 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3618,7 +3618,8 @@ no_journal:
 		goto failed_mount4;
 	}
 
-	ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
+	if (ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY))
+		sb->s_flags |= MS_RDONLY;
 
 	/* determine the minimum size of new large inodes, if present */
 	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
-- 
cgit v1.1


From 801bdd926b6229da233f6db25770c9e817f98d4e Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 30 May 2012 23:00:16 -0400
Subject: ext4: add missing save_error_info() to ext4_error()

commit f3fc0210c0fc91900766c995f089c39170e68305 upstream.

The ext4_error() function is missing a call to save_error_info().
Since this is the function which marks the file system as containing
an error, this oversight (which was introduced in 2.6.36) is quite
significant, and should be backported to older stable kernels with
high urgency.

Reported-by: Ken Sumrall <ksumrall@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Cc: ksumrall@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index be6c0cc..113b107 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -433,6 +433,7 @@ void __ext4_error(struct super_block *sb, const char *function,
 	printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
 	       sb->s_id, function, line, current->comm, &vaf);
 	va_end(args);
+	save_error_info(sb, function, line);
 
 	ext4_handle_error(sb);
 }
-- 
cgit v1.1


From eeb7cb57cf619ae9ab8210b21b49820ed40a472f Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 31 May 2012 23:46:01 -0400
Subject: ext4: don't trash state flags in EXT4_IOC_SETFLAGS

commit 79906964a187c405db72a3abc60eb9b50d804fbc upstream.

In commit 353eb83c we removed i_state_flags with 64-bit longs, But
when handling the EXT4_IOC_SETFLAGS ioctl, we replace i_flags
directly, which trashes the state flags which are stored in the high
32-bits of i_flags on 64-bit platforms.  So use the the
ext4_{set,clear}_inode_flags() functions which use atomic bit
manipulation functions instead.

Reported-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ioctl.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 808c554..892427d 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -35,7 +35,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		handle_t *handle = NULL;
 		int err, migrate = 0;
 		struct ext4_iloc iloc;
-		unsigned int oldflags;
+		unsigned int oldflags, mask, i;
 		unsigned int jflag;
 
 		if (!inode_owner_or_capable(inode))
@@ -112,8 +112,14 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		if (err)
 			goto flags_err;
 
-		flags = flags & EXT4_FL_USER_MODIFIABLE;
-		flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE;
+		for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {
+			if (!(mask & EXT4_FL_USER_MODIFIABLE))
+				continue;
+			if (mask & flags)
+				ext4_set_inode_flag(inode, i);
+			else
+				ext4_clear_inode_flag(inode, i);
+		}
 		ei->i_flags = flags;
 
 		ext4_set_inode_flags(inode);
-- 
cgit v1.1


From 97434cf53353728708c133af183a11a158c8c26a Mon Sep 17 00:00:00 2001
From: Salman Qazi <sqazi@google.com>
Date: Thu, 31 May 2012 23:51:27 -0400
Subject: ext4: add ext4_mb_unload_buddy in the error path

commit 02b7831019ea4e7994968c84b5826fa8b248ffc8 upstream.

ext4_free_blocks fails to pair an ext4_mb_load_buddy with a matching
ext4_mb_unload_buddy when it fails a memory allocation.

Signed-off-by: Salman Qazi <sqazi@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/mballoc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 0f1be7f..e3d5575 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4583,6 +4583,7 @@ do_more:
 		 */
 		new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
 		if (!new_entry) {
+			ext4_mb_unload_buddy(&e4b);
 			err = -ENOMEM;
 			goto error_return;
 		}
-- 
cgit v1.1


From 32e090b1f4bdfe9756e1b8f0b5280acb036d1c61 Mon Sep 17 00:00:00 2001
From: Salman Qazi <sqazi@google.com>
Date: Thu, 31 May 2012 23:52:14 -0400
Subject: ext4: remove mb_groups before tearing down the buddy_cache

commit 95599968d19db175829fb580baa6b68939b320fb upstream.

We can't have references held on pages in the s_buddy_cache while we are
trying to truncate its pages and put the inode.  All the pages must be
gone before we reach clear_inode.  This can only be gauranteed if we
can prevent new users from grabbing references to s_buddy_cache's pages.

The original bug can be reproduced and the bug fix can be verified by:

while true; do mount -t ext4 /dev/ram0 /export/hda3/ram0; \
	umount /export/hda3/ram0; done &

while true; do cat /proc/fs/ext4/ram0/mb_groups; done

Signed-off-by: Salman Qazi <sqazi@google.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/mballoc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e3d5575..b6adf68 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2528,6 +2528,9 @@ int ext4_mb_release(struct super_block *sb)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
+	if (sbi->s_proc)
+		remove_proc_entry("mb_groups", sbi->s_proc);
+
 	if (sbi->s_group_info) {
 		for (i = 0; i < ngroups; i++) {
 			grinfo = ext4_get_group_info(sb, i);
@@ -2575,8 +2578,6 @@ int ext4_mb_release(struct super_block *sb)
 	}
 
 	free_percpu(sbi->s_locality_groups);
-	if (sbi->s_proc)
-		remove_proc_entry("mb_groups", sbi->s_proc);
 
 	return 0;
 }
-- 
cgit v1.1


From 749c8151fdf307fb7527aded025850027d44aadc Mon Sep 17 00:00:00 2001
From: Tao Ma <boyu.mt@taobao.com>
Date: Thu, 7 Jun 2012 19:04:19 -0400
Subject: ext4: don't set i_flags in EXT4_IOC_SETFLAGS

commit b22b1f178f6799278d3178d894f37facb2085765 upstream.

Commit 7990696 uses the ext4_{set,clear}_inode_flags() functions to
change the i_flags automatically but fails to remove the error setting
of i_flags.  So we still have the problem of trashing state flags.
Fix this by removing the assignment.

Signed-off-by: Tao Ma <boyu.mt@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ioctl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 892427d..4cbe1c2 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -120,7 +120,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 			else
 				ext4_clear_inode_flag(inode, i);
 		}
-		ei->i_flags = flags;
 
 		ext4_set_inode_flags(inode);
 		inode->i_ctime = ext4_current_time(inode);
-- 
cgit v1.1


From 6ff2c41b81bd0778aa44ffcfce0ea623fa660887 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 30 Jun 2012 19:14:57 -0400
Subject: ext4: pass a char * to ext4_count_free() instead of a buffer_head ptr

commit f6fb99cadcd44660c68e13f6eab28333653621e6 upstream.

Make it possible for ext4_count_free to operate on buffers and not
just data in buffer_heads.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/balloc.c | 3 ++-
 fs/ext4/bitmap.c | 8 +++-----
 fs/ext4/ext4.h   | 2 +-
 fs/ext4/ialloc.c | 3 ++-
 4 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 264f694..ebe95f5 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -514,7 +514,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 		if (bitmap_bh == NULL)
 			continue;
 
-		x = ext4_count_free(bitmap_bh, sb->s_blocksize);
+		x = ext4_count_free(bitmap_bh->b_data,
+				    EXT4_BLOCKS_PER_GROUP(sb) / 8);
 		printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
 			i, ext4_free_blks_count(sb, gdp), x);
 		bitmap_count += x;
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index fa3af81..012faaa 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -15,15 +15,13 @@
 
 static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
-unsigned int ext4_count_free(struct buffer_head *map, unsigned int numchars)
+unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
 {
 	unsigned int i, sum = 0;
 
-	if (!map)
-		return 0;
 	for (i = 0; i < numchars; i++)
-		sum += nibblemap[map->b_data[i] & 0xf] +
-			nibblemap[(map->b_data[i] >> 4) & 0xf];
+		sum += nibblemap[bitmap[i] & 0xf] +
+			nibblemap[(bitmap[i] >> 4) & 0xf];
 	return sum;
 }
 
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1a34c1c..e0113aa 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1713,7 +1713,7 @@ struct mmpd_data {
 # define NORET_AND	noreturn,
 
 /* bitmap.c */
-extern unsigned int ext4_count_free(struct buffer_head *, unsigned);
+extern unsigned int ext4_count_free(char *bitmap, unsigned numchars);
 
 /* balloc.c */
 extern unsigned int ext4_block_group(struct super_block *sb,
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 412469b..29272de 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1193,7 +1193,8 @@ unsigned long ext4_count_free_inodes(struct super_block *sb)
 		if (!bitmap_bh)
 			continue;
 
-		x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
+		x = ext4_count_free(bitmap_bh->b_data,
+				    EXT4_INODES_PER_GROUP(sb) / 8);
 		printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
 			(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
 		bitmap_count += x;
-- 
cgit v1.1


From b4cbf953e071121a4082e35c9820c3c3c10aeb55 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Sun, 22 Jul 2012 23:59:40 -0400
Subject: ext4: don't let i_reserved_meta_blocks go negative

commit 97795d2a5b8d3c8dc4365d4bd3404191840453ba upstream.

If we hit a condition where we have allocated metadata blocks that
were not appropriately reserved, we risk underflow of
ei->i_reserved_meta_blocks.  In turn, this can throw
sbi->s_dirtyclusters_counter significantly out of whack and undermine
the nondelalloc fallback logic in ext4_nonda_switch().  Warn if this
occurs and set i_allocated_meta_blocks to avoid this problem.

This condition is reproduced by xfstests 270 against ext2 with
delalloc enabled:

Mar 28 08:58:02 localhost kernel: [  171.526344] EXT4-fs (loop1): delayed block allocation failed for inode 14 at logical offset 64486 with max blocks 64 with error -28
Mar 28 08:58:02 localhost kernel: [  171.526346] EXT4-fs (loop1): This should not happen!! Data will be lost

270 ultimately fails with an inconsistent filesystem and requires an
fsck to repair.  The cause of the error is an underflow in
ext4_da_update_reserve_space() due to an unreserved meta block
allocation.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c1e6a72..18fee6d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1134,6 +1134,15 @@ void ext4_da_update_reserve_space(struct inode *inode,
 		used = ei->i_reserved_data_blocks;
 	}
 
+	if (unlikely(ei->i_allocated_meta_blocks > ei->i_reserved_meta_blocks)) {
+		ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, allocated %d "
+			 "with only %d reserved metadata blocks\n", __func__,
+			 inode->i_ino, ei->i_allocated_meta_blocks,
+			 ei->i_reserved_meta_blocks);
+		WARN_ON(1);
+		ei->i_allocated_meta_blocks = ei->i_reserved_meta_blocks;
+	}
+
 	/* Update per-inode reservations */
 	ei->i_reserved_data_blocks -= used;
 	ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
-- 
cgit v1.1


From b1aa47aec9ca0de65c67d811c291153503972a08 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 5 Aug 2012 23:28:16 -0400
Subject: ext4: avoid kmemcheck complaint from reading uninitialized memory

commit 7e731bc9a12339f344cddf82166b82633d99dd86 upstream.

Commit 03179fe923 introduced a kmemcheck complaint in
ext4_da_get_block_prep() because we save and restore
ei->i_da_metadata_calc_last_lblock even though it is left
uninitialized in the case where i_da_metadata_calc_len is zero.

This doesn't hurt anything, but silencing the kmemcheck complaint
makes it easier for people to find real bugs.

Addresses https://bugzilla.kernel.org/show_bug.cgi?id=45631
(which is marked as a regression).

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 113b107..489d406 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -860,6 +860,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->i_reserved_meta_blocks = 0;
 	ei->i_allocated_meta_blocks = 0;
 	ei->i_da_metadata_calc_len = 0;
+	ei->i_da_metadata_calc_last_lblock = 0;
 	spin_lock_init(&(ei->i_block_reservation_lock));
 #ifdef CONFIG_QUOTA
 	ei->i_reserved_quota = 0;
-- 
cgit v1.1


From 48fa0772b93d6e2482d23a41a9c6474cfa2e5e35 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Wed, 26 Sep 2012 12:32:54 -0400
Subject: ext4: online defrag is not supported for journaled files

commit f066055a3449f0e5b0ae4f3ceab4445bead47638 upstream.

Proper block swap for inodes with full journaling enabled is
truly non obvious task. In order to be on a safe side let's
explicitly disable it for now.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/move_extent.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index f57455a..72f9732 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1209,7 +1209,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
-
+	/* TODO: This is non obvious task to swap blocks for inodes with full
+	   jornaling enabled */
+	if (ext4_should_journal_data(orig_inode) ||
+	    ext4_should_journal_data(donor_inode)) {
+		return -EINVAL;
+	}
 	/* Protect orig and donor inodes against a truncate */
 	ret1 = mext_inode_double_lock(orig_inode, donor_inode);
 	if (ret1 < 0)
-- 
cgit v1.1


From 985f704d74944dc66b5185aa9ccebcb936f2b8e0 Mon Sep 17 00:00:00 2001
From: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Date: Wed, 26 Sep 2012 21:24:57 -0400
Subject: ext4: always set i_op in ext4_mknod()

commit 6a08f447facb4f9e29fcc30fb68060bb5a0d21c2 upstream.

ext4_special_inode_operations have their own ifdef CONFIG_EXT4_FS_XATTR
to mask those methods. And ext4_iget also always sets it, so there is
an inconsistency.

Signed-off-by: Bernd Schubert <bernd.schubert@itwm.fraunhofer.de>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/namei.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 3d36d5a..78585fc 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1799,9 +1799,7 @@ retry:
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT4_FS_XATTR
 		inode->i_op = &ext4_special_inode_operations;
-#endif
 		err = ext4_add_nondir(handle, dentry, inode);
 	}
 	ext4_journal_stop(handle);
-- 
cgit v1.1


From a6c0070c1f5a6c7b0bba5bb5be44b1dabe88af56 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 26 Sep 2012 21:52:20 -0400
Subject: ext4: fix fdatasync() for files with only i_size changes

commit b71fc079b5d8f42b2a52743c8d2f1d35d655b1c5 upstream.

Code tracking when transaction needs to be committed on fdatasync(2) forgets
to handle a situation when only inode's i_size is changed. Thus in such
situations fdatasync(2) doesn't force transaction with new i_size to disk
and that can result in wrong i_size after a crash.

Fix the issue by updating inode's i_datasync_tid whenever its size is
updated.

Reported-by: Kristian Nielsen <knielsen@knielsen-hq.org>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 18fee6d..1dbf758 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5151,6 +5151,7 @@ static int ext4_do_update_inode(handle_t *handle,
 	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct buffer_head *bh = iloc->bh;
 	int err = 0, rc, block;
+	int need_datasync = 0;
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
@@ -5199,7 +5200,10 @@ static int ext4_do_update_inode(handle_t *handle,
 		raw_inode->i_file_acl_high =
 			cpu_to_le16(ei->i_file_acl >> 32);
 	raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
-	ext4_isize_set(raw_inode, ei->i_disksize);
+	if (ei->i_disksize != ext4_isize(raw_inode)) {
+		ext4_isize_set(raw_inode, ei->i_disksize);
+		need_datasync = 1;
+	}
 	if (ei->i_disksize > 0x7fffffffULL) {
 		struct super_block *sb = inode->i_sb;
 		if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -5252,7 +5256,7 @@ static int ext4_do_update_inode(handle_t *handle,
 		err = rc;
 	ext4_clear_inode_state(inode, EXT4_STATE_NEW);
 
-	ext4_update_inode_fsync_trans(handle, inode, 0);
+	ext4_update_inode_fsync_trans(handle, inode, need_datasync);
 out_brelse:
 	brelse(bh);
 	ext4_std_error(inode->i_sb, err);
-- 
cgit v1.1


From 9d4cbf806eaefbf4a8a63de492a060e06b07f5a6 Mon Sep 17 00:00:00 2001
From: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
Date: Thu, 8 Nov 2012 15:11:11 -0500
Subject: ext4: fix memory leak in ext4_xattr_set_acl()'s error path

commit 24ec19b0ae83a385ad9c55520716da671274b96c upstream.

In ext4_xattr_set_acl(), if ext4_journal_start() returns an error,
posix_acl_release() will not be called for 'acl' which may result in a
memory leak.

This patch fixes that.

Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Eugene Shatokhin <eugene.shatokhin@rosalab.ru>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/acl.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 21eacd7..4922087 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -450,8 +450,10 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
 
 retry:
 	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
-	if (IS_ERR(handle))
-		return PTR_ERR(handle);
+	if (IS_ERR(handle)) {
+		error = PTR_ERR(handle);
+		goto release_and_out;
+	}
 	error = ext4_set_acl(handle, inode, type, acl);
 	ext4_journal_stop(handle);
 	if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
-- 
cgit v1.1


From 8cbe63802ddac6b242e4a4293fc338fa7808272c Mon Sep 17 00:00:00 2001
From: Forrest Liu <forrestl@synology.com>
Date: Mon, 17 Dec 2012 09:55:39 -0500
Subject: ext4: fix extent tree corruption caused by hole punch

commit c36575e663e302dbaa4d16b9c72d2c9a913a9aef upstream.

When depth of extent tree is greater than 1, logical start value of
interior node is not correctly updated in ext4_ext_rm_idx.

Signed-off-by: Forrest Liu <forrestl@synology.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Ashish Sangwan <ashishsangwan2@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/extents.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 611647b..680df5d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2140,13 +2140,14 @@ ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
  * last index in the block only.
  */
 static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
-			struct ext4_ext_path *path)
+			struct ext4_ext_path *path, int depth)
 {
 	int err;
 	ext4_fsblk_t leaf;
 
 	/* free index block */
-	path--;
+	depth--;
+	path = path + depth;
 	leaf = ext4_idx_pblock(path->p_idx);
 	if (unlikely(path->p_hdr->eh_entries == 0)) {
 		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
@@ -2162,6 +2163,19 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	ext_debug("index is empty, remove it, free block %llu\n", leaf);
 	ext4_free_blocks(handle, inode, NULL, leaf, 1,
 			 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+
+	while (--depth >= 0) {
+		if (path->p_idx != EXT_FIRST_INDEX(path->p_hdr))
+			break;
+		path--;
+		err = ext4_ext_get_access(handle, inode, path);
+		if (err)
+			break;
+		path->p_idx->ei_block = (path+1)->p_idx->ei_block;
+		err = ext4_ext_dirty(handle, inode, path);
+		if (err)
+			break;
+	}
 	return err;
 }
 
@@ -2509,7 +2523,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 	/* if this leaf is free, then we should
 	 * remove it from index block above */
 	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
-		err = ext4_ext_rm_idx(handle, inode, path + depth);
+		err = ext4_ext_rm_idx(handle, inode, path, depth);
 
 out:
 	return err;
@@ -2639,7 +2653,7 @@ again:
 				/* index is empty, remove it;
 				 * handle must be already prepared by the
 				 * truncatei_leaf() */
-				err = ext4_ext_rm_idx(handle, inode, path + i);
+				err = ext4_ext_rm_idx(handle, inode, path, i);
 			}
 			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
-- 
cgit v1.1


From 1ba9a27ec24b133799bc1b8a77d733c4015d0abd Mon Sep 17 00:00:00 2001
From: Michael Tokarev <mjt@tls.msk.ru>
Date: Tue, 25 Dec 2012 14:08:16 -0500
Subject: ext4: do not try to write superblock on ro remount w/o journal

commit d096ad0f79a782935d2e06ae8fb235e8c5397775 upstream.

When a journal-less ext4 filesystem is mounted on a read-only block
device (blockdev --setro will do), each remount (for other, unrelated,
flags, like suid=>nosuid etc) results in a series of scary messages
from kernel telling about I/O errors on the device.

This is becauese of the following code ext4_remount():

       if (sbi->s_journal == NULL)
                ext4_commit_super(sb, 1);

at the end of remount procedure, which forces writing (flushing) of
a superblock regardless whenever it is dirty or not, if the filesystem
is readonly or not, and whenever the device itself is readonly or not.

We only need call ext4_commit_super when the file system had been
previously mounted read/write.

Thanks to Eric Sandeen for help in diagnosing this issue.

Signed-off-By: Michael Tokarev <mjt@tls.msk.ru>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 489d406..9ca954e 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4438,7 +4438,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 	}
 
 	ext4_setup_system_zone(sb);
-	if (sbi->s_journal == NULL)
+	if (sbi->s_journal == NULL && !(old_sb_flags & MS_RDONLY))
 		ext4_commit_super(sb, 1);
 
 #ifdef CONFIG_QUOTA
-- 
cgit v1.1


From a4202fd727f6bf57827061104a60307e78124ea2 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Thu, 27 Dec 2012 01:42:48 -0500
Subject: ext4: lock i_mutex when truncating orphan inodes

commit 721e3eba21e43532e438652dd8f1fcdfce3187e7 upstream.

Commit c278531d39 added a warning when ext4_flush_unwritten_io() is
called without i_mutex being taken.  It had previously not been taken
during orphan cleanup since races weren't possible at that point in
the mount process, but as a result of this c278531d39, we will now see
a kernel WARN_ON in this case.  Take the i_mutex in
ext4_orphan_cleanup() to suppress this warning.

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 9ca954e..f1aa1a2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2204,7 +2204,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,
 				__func__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
 				  inode->i_ino, inode->i_size);
+			mutex_lock(&inode->i_mutex);
 			ext4_truncate(inode);
+			mutex_unlock(&inode->i_mutex);
 			nr_truncates++;
 		} else {
 			ext4_msg(sb, KERN_DEBUG,
-- 
cgit v1.1


From 97765f30c49d3023a6dafe7c48c7e520fca441f8 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 14 Nov 2012 22:22:05 -0500
Subject: ext4: init pagevec in ext4_da_block_invalidatepages

commit 66bea92c69477a75a5d37b9bfed5773c92a3c4b4 upstream.

ext4_da_block_invalidatepages is missing a pagevec_init(),
which means that pvec->cold contains random garbage.

This affects whether the page goes to the front or
back of the LRU when ->cold makes it to
free_hot_cold_page()

Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Signed-off-by: CAI Qian <caiqian@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/ext4')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 1dbf758..7e56946 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2199,6 +2199,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd)
 
 	index = mpd->first_page;
 	end   = mpd->next_page - 1;
+
+	pagevec_init(&pvec, 0);
 	while (index <= end) {
 		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
 		if (nr_pages == 0)
-- 
cgit v1.1