aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext3')
-rw-r--r--fs/ext3/acl.c97
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/balloc.c55
-rw-r--r--fs/ext3/dir.c167
-rw-r--r--fs/ext3/file.c3
-rw-r--r--fs/ext3/fsync.c17
-rw-r--r--fs/ext3/hash.c4
-rw-r--r--fs/ext3/ialloc.c51
-rw-r--r--fs/ext3/inode.c207
-rw-r--r--fs/ext3/ioctl.c28
-rw-r--r--fs/ext3/namei.c34
-rw-r--r--fs/ext3/super.c34
-rw-r--r--fs/ext3/xattr_security.c36
13 files changed, 412 insertions, 325 deletions
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 9d021c0..3091f62 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -131,7 +131,7 @@ fail:
*
* inode->i_mutex: don't care
*/
-static struct posix_acl *
+struct posix_acl *
ext3_get_acl(struct inode *inode, int type)
{
int name_index;
@@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
case ACL_TYPE_ACCESS:
name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- mode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
+ error = posix_acl_equiv_mode(acl, &inode->i_mode);
if (error < 0)
return error;
else {
- inode->i_mode = mode;
inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode);
if (error == 0)
@@ -239,29 +237,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
}
-int
-ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
-{
- struct posix_acl *acl;
-
- if (flags & IPERM_FLAG_RCU) {
- if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
- return -ECHILD;
- return -EAGAIN;
- }
-
- acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- int error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- return error;
- }
-
- return -EAGAIN;
-}
-
/*
* Initialize the ACLs of a new inode. Called from ext3_new_inode.
*
@@ -284,31 +259,20 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
inode->i_mode &= ~current_umask();
}
if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
- struct posix_acl *clone;
- mode_t mode;
-
if (S_ISDIR(inode->i_mode)) {
error = ext3_set_acl(handle, inode,
ACL_TYPE_DEFAULT, acl);
if (error)
goto cleanup;
}
- clone = posix_acl_clone(acl, GFP_NOFS);
- error = -ENOMEM;
- if (!clone)
- goto cleanup;
-
- mode = inode->i_mode;
- error = posix_acl_create_masq(clone, &mode);
- if (error >= 0) {
- inode->i_mode = mode;
- if (error > 0) {
- /* This is an extended ACL */
- error = ext3_set_acl(handle, inode,
- ACL_TYPE_ACCESS, clone);
- }
+ error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+ if (error < 0)
+ return error;
+
+ if (error > 0) {
+ /* This is an extended ACL */
+ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
}
- posix_acl_release(clone);
}
cleanup:
posix_acl_release(acl);
@@ -332,7 +296,9 @@ cleanup:
int
ext3_acl_chmod(struct inode *inode)
{
- struct posix_acl *acl, *clone;
+ struct posix_acl *acl;
+ handle_t *handle;
+ int retries = 0;
int error;
if (S_ISLNK(inode->i_mode))
@@ -342,31 +308,24 @@ ext3_acl_chmod(struct inode *inode)
acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
- clone = posix_acl_clone(acl, GFP_KERNEL);
- posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
- error = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!error) {
- handle_t *handle;
- int retries = 0;
-
- retry:
- handle = ext3_journal_start(inode,
- EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- ext3_std_error(inode->i_sb, error);
- goto out;
- }
- error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
- ext3_journal_stop(handle);
- if (error == -ENOSPC &&
- ext3_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
+ error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (error)
+ return error;
+retry:
+ handle = ext3_journal_start(inode,
+ EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ ext3_std_error(inode->i_sb, error);
+ goto out;
}
+ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
+ ext3_journal_stop(handle);
+ if (error == -ENOSPC &&
+ ext3_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
out:
- posix_acl_release(clone);
+ posix_acl_release(acl);
return error;
}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5faf804..dbc921e 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
#ifdef CONFIG_EXT3_FS_POSIX_ACL
/* acl.c */
-extern int ext3_check_acl (struct inode *, int, unsigned int);
+extern struct posix_acl *ext3_get_acl(struct inode *inode, int type);
extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT3_FS_POSIX_ACL */
#include <linux/sched.h>
-#define ext3_check_acl NULL
+#define ext3_get_acl NULL
static inline int
ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index fe52297..a203892 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
+#include <trace/events/ext3.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
desc = ext3_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+ trace_ext3_read_block_bitmap(sb, block_group);
bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
struct rb_node * parent = NULL;
struct ext3_reserve_window_node *this;
+ trace_ext3_rsv_window_add(sb, rsv);
while (*p)
{
parent = *p;
@@ -424,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
void ext3_init_block_alloc_info(struct inode *inode)
{
struct ext3_inode_info *ei = EXT3_I(inode);
- struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+ struct ext3_block_alloc_info *block_i;
struct super_block *sb = inode->i_sb;
block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
rsv = &block_i->rsv_window_node;
if (!rsv_is_empty(&rsv->rsv_window)) {
spin_lock(rsv_lock);
- if (!rsv_is_empty(&rsv->rsv_window))
+ if (!rsv_is_empty(&rsv->rsv_window)) {
+ trace_ext3_discard_reservation(inode, rsv);
rsv_window_remove(inode->i_sb, rsv);
+ }
spin_unlock(rsv_lock);
}
}
@@ -683,14 +688,10 @@ error_return:
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
- struct super_block * sb;
+ struct super_block *sb = inode->i_sb;
unsigned long dquot_freed_blocks;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ trace_ext3_free_blocks(inode, block, count);
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
else
start_block = grp_goal + group_first_block;
+ trace_ext3_alloc_new_reservation(sb, start_block);
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
* check if the first free block is within the
* free space we just reserved
*/
- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+ if (start_block >= my_rsv->rsv_start &&
+ start_block <= my_rsv->rsv_end) {
+ trace_ext3_reserved(sb, start_block, my_rsv);
return 0; /* success */
+ }
/*
* if the first free bit we found is out of the reservable space
* continue search for next reservable space,
@@ -1435,14 +1440,14 @@ out:
*
* Check if filesystem has at least 1 free block available for allocation.
*/
-static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
{
ext3_fsblk_t free_blocks, root_blocks;
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current_fsuid() &&
+ !use_reservation && sbi->s_resuid != current_fsuid() &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
return 0;
}
@@ -1463,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
*/
int ext3_should_retry_alloc(struct super_block *sb, int *retries)
{
- if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+ if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
return 0;
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
*errp = -ENOSPC;
sb = inode->i_sb;
- if (!sb) {
- printk("ext3_new_block: nonexistent device");
- return 0;
- }
/*
* Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
return 0;
}
+ trace_ext3_request_blocks(inode, goal, num);
+
sbi = EXT3_SB(sb);
- es = EXT3_SB(sb)->s_es;
+ es = sbi->s_es;
ext3_debug("goal=%lu.\n", goal);
/*
* Allocate a block from reservation only when
@@ -1543,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
my_rsv = &block_i->rsv_window_node;
- if (!ext3_has_free_blocks(sbi)) {
+ if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
*errp = -ENOSPC;
goto out;
}
@@ -1742,6 +1745,10 @@ allocated:
brelse(bitmap_bh);
dquot_free_block(inode, *count-num);
*count = num;
+
+ trace_ext3_allocate_blocks(inode, goal, num,
+ (unsigned long long)ret_block);
+
return ret_block;
io_error:
@@ -1917,9 +1924,10 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
* reaches any used block. Then issue a TRIM command on this extent and free
* the extent in the block bitmap. This is done until whole group is scanned.
*/
-ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
- ext3_grpblk_t start, ext3_grpblk_t max,
- ext3_grpblk_t minblocks)
+static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
+ unsigned int group,
+ ext3_grpblk_t start, ext3_grpblk_t max,
+ ext3_grpblk_t minblocks)
{
handle_t *handle;
ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
@@ -1996,6 +2004,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
if ((next - start) < minblocks)
goto free_extent;
+ trace_ext3_discard_blocks(sb, discard_block, next - start);
/* Send the TRIM command down to the device */
err = sb_issue_discard(sb, discard_block, next - start,
GFP_NOFS, 0);
@@ -2100,7 +2109,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
if (start >= max_blks)
- goto out;
+ return -EINVAL;
if (start + len > max_blks)
len = max_blks - start;
@@ -2148,8 +2157,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (ret >= 0)
ret = 0;
-
-out:
range->len = trimmed * sb->s_blocksize;
return ret;
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 34f0a07..3268697 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -25,6 +25,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/buffer_head.h>
+#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
@@ -32,24 +33,8 @@ static unsigned char ext3_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int ext3_readdir(struct file *, void *, filldir_t);
static int ext3_dx_readdir(struct file * filp,
void * dirent, filldir_t filldir);
-static int ext3_release_dir (struct inode * inode,
- struct file * filp);
-
-const struct file_operations ext3_dir_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = ext3_readdir, /* we take BKL. needed?*/
- .unlocked_ioctl = ext3_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext3_compat_ioctl,
-#endif
- .fsync = ext3_sync_file, /* BKL held */
- .release = ext3_release_dir,
-};
-
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
@@ -60,6 +45,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext3_filetype_table[filetype]);
}
+/**
+ * Check if the given dir-inode refers to an htree-indexed directory
+ * (or a directory which chould potentially get coverted to use htree
+ * indexing).
+ *
+ * Return 1 if it is a dx dir, 0 if not
+ */
+static int is_dx_dir(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+ EXT3_FEATURE_COMPAT_DIR_INDEX) &&
+ ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+ ((inode->i_size >> sb->s_blocksize_bits) == 1)))
+ return 1;
+
+ return 0;
+}
int ext3_check_dir_entry (const char * function, struct inode * dir,
struct ext3_dir_entry_2 * de,
@@ -99,18 +103,13 @@ static int ext3_readdir(struct file * filp,
unsigned long offset;
int i, stored;
struct ext3_dir_entry_2 *de;
- struct super_block *sb;
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
int ret = 0;
int dir_has_error = 0;
- sb = inode->i_sb;
-
- if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT3_FEATURE_COMPAT_DIR_INDEX) &&
- ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
- ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
+ if (is_dx_dir(inode)) {
err = ext3_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
ret = err;
@@ -232,22 +231,87 @@ out:
return ret;
}
+static inline int is_32bit_api(void)
+{
+#ifdef CONFIG_COMPAT
+ return is_compat_task();
+#else
+ return (BITS_PER_LONG == 32);
+#endif
+}
+
/*
* These functions convert from the major/minor hash to an f_pos
- * value.
+ * value for dx directories
*
- * Currently we only use major hash numer. This is unfortunate, but
- * on 32-bit machines, the same VFS interface is used for lseek and
- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
- * lseek/telldir/seekdir will blow out spectacularly, and from within
- * the ext2 low-level routine, we don't know if we're being called by
- * a 64-bit version of the system call or the 32-bit version of the
- * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
- * cookie. Sigh.
+ * Upper layer (for example NFS) should specify FMODE_32BITHASH or
+ * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
+ * directly on both 32-bit and 64-bit nodes, under such case, neither
+ * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
*/
-#define hash2pos(major, minor) (major >> 1)
-#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
-#define pos2min_hash(pos) (0)
+static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return major >> 1;
+ else
+ return ((__u64)(major >> 1) << 32) | (__u64)minor;
+}
+
+static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return (pos << 1) & 0xffffffff;
+ else
+ return ((pos >> 32) << 1) & 0xffffffff;
+}
+
+static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return 0;
+ else
+ return pos & 0xffffffff;
+}
+
+/*
+ * Return 32- or 64-bit end-of-file for dx directories
+ */
+static inline loff_t ext3_get_htree_eof(struct file *filp)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return EXT3_HTREE_EOF_32BIT;
+ else
+ return EXT3_HTREE_EOF_64BIT;
+}
+
+
+/*
+ * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
+ * non-htree and htree directories, where the "offset" is in terms
+ * of the filename hash value instead of the byte offset.
+ *
+ * Because we may return a 64-bit hash that is well beyond s_maxbytes,
+ * we need to pass the max hash as the maximum allowable offset in
+ * the htree directory case.
+ *
+ * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
+ * will be invalid once the directory was converted into a dx directory
+ */
+loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ int dx_dir = is_dx_dir(inode);
+
+ if (likely(dx_dir))
+ return generic_file_llseek_size(file, offset, origin,
+ ext3_get_htree_eof(file));
+ else
+ return generic_file_llseek(file, offset, origin);
+}
/*
* This structure holds the nodes of the red-black tree used to store
@@ -308,15 +372,16 @@ static void free_rb_tree_fname(struct rb_root *root)
}
-static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
+ loff_t pos)
{
struct dir_private_info *p;
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->curr_hash = pos2maj_hash(pos);
- p->curr_minor_hash = pos2min_hash(pos);
+ p->curr_hash = pos2maj_hash(filp, pos);
+ p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
}
@@ -406,7 +471,7 @@ static int call_filldir(struct file * filp, void * dirent,
printk("call_filldir: called with null fname?!?\n");
return 0;
}
- curr_pos = hash2pos(fname->hash, fname->minor_hash);
+ curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
while (fname) {
error = filldir(dirent, fname->name,
fname->name_len, curr_pos,
@@ -431,13 +496,13 @@ static int ext3_dx_readdir(struct file * filp,
int ret;
if (!info) {
- info = ext3_htree_create_dir_info(filp->f_pos);
+ info = ext3_htree_create_dir_info(filp, filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
}
- if (filp->f_pos == EXT3_HTREE_EOF)
+ if (filp->f_pos == ext3_get_htree_eof(filp))
return 0; /* EOF */
/* Some one has messed with f_pos; reset the world */
@@ -445,8 +510,8 @@ static int ext3_dx_readdir(struct file * filp,
free_rb_tree_fname(&info->root);
info->curr_node = NULL;
info->extra_fname = NULL;
- info->curr_hash = pos2maj_hash(filp->f_pos);
- info->curr_minor_hash = pos2min_hash(filp->f_pos);
+ info->curr_hash = pos2maj_hash(filp, filp->f_pos);
+ info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
}
/*
@@ -478,7 +543,7 @@ static int ext3_dx_readdir(struct file * filp,
if (ret < 0)
return ret;
if (ret == 0) {
- filp->f_pos = EXT3_HTREE_EOF;
+ filp->f_pos = ext3_get_htree_eof(filp);
break;
}
info->curr_node = rb_first(&info->root);
@@ -498,7 +563,7 @@ static int ext3_dx_readdir(struct file * filp,
info->curr_minor_hash = fname->minor_hash;
} else {
if (info->next_hash == ~0) {
- filp->f_pos = EXT3_HTREE_EOF;
+ filp->f_pos = ext3_get_htree_eof(filp);
break;
}
info->curr_hash = info->next_hash;
@@ -517,3 +582,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
return 0;
}
+
+const struct file_operations ext3_dir_operations = {
+ .llseek = ext3_dir_llseek,
+ .read = generic_read_dir,
+ .readdir = ext3_readdir,
+ .unlocked_ioctl = ext3_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext3_compat_ioctl,
+#endif
+ .fsync = ext3_sync_file,
+ .release = ext3_release_dir,
+};
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index f55df0e..724df69 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
};
const struct inode_operations ext3_file_inode_operations = {
- .truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
@@ -79,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .get_acl = ext3_get_acl,
.fiemap = ext3_fiemap,
};
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb..1860ed3 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
/*
* akpm: A new design for ext3_sync_file().
@@ -43,7 +44,7 @@
* inode to disk.
*/
-int ext3_sync_file(struct file *file, int datasync)
+int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode);
@@ -51,9 +52,15 @@ int ext3_sync_file(struct file *file, int datasync)
int ret, needs_barrier = 0;
tid_t commit_tid;
+ trace_ext3_sync_file_enter(file, datasync);
+
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ goto out;
+
J_ASSERT(ext3_journal_current_handle() == NULL);
/*
@@ -70,8 +77,10 @@ int ext3_sync_file(struct file *file, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
- if (ext3_should_journal_data(inode))
- return ext3_force_commit(inode->i_sb);
+ if (ext3_should_journal_data(inode)) {
+ ret = ext3_force_commit(inode->i_sb);
+ goto out;
+ }
if (datasync)
commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +100,7 @@ int ext3_sync_file(struct file *file, int datasync)
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+out:
+ trace_ext3_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 7d215b4..d4d3ade 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -200,8 +200,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
return -1;
}
hash = hash & ~1;
- if (hash == (EXT3_HTREE_EOF << 1))
- hash = (EXT3_HTREE_EOF-1) << 1;
+ if (hash == (EXT3_HTREE_EOF_32BIT << 1))
+ hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
hinfo->hash = hash;
hinfo->minor_hash = minor_hash;
return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 0b3da7c..adae962 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
+#include <trace/events/ext3.h>
#include <asm/byteorder.h>
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
+ trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
@@ -176,42 +178,6 @@ error_return:
}
/*
- * There are two policies for allocating an inode. If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory\'s block
- * group to find a free inode.
- */
-static int find_group_dir(struct super_block *sb, struct inode *parent)
-{
- int ngroups = EXT3_SB(sb)->s_groups_count;
- unsigned int freei, avefreei;
- struct ext3_group_desc *desc, *best_desc = NULL;
- int group, best_group = -1;
-
- freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
- avefreei = freei / ngroups;
-
- for (group = 0; group < ngroups; group++) {
- desc = ext3_get_group_desc (sb, group, NULL);
- if (!desc || !desc->bg_free_inodes_count)
- continue;
- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
- continue;
- if (!best_desc ||
- (le16_to_cpu(desc->bg_free_blocks_count) >
- le16_to_cpu(best_desc->bg_free_blocks_count))) {
- best_group = group;
- best_desc = desc;
- }
- }
- return best_group;
-}
-
-/*
* Orlov's allocator for directories.
*
* We always try to spread first-level directories.
@@ -426,6 +392,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
return ERR_PTR(-EPERM);
sb = dir->i_sb;
+ trace_ext3_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -433,12 +400,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
sbi = EXT3_SB(sb);
es = sbi->s_es;
- if (S_ISDIR(mode)) {
- if (test_opt (sb, OLDALLOC))
- group = find_group_dir(sb, dir);
- else
- group = find_group_orlov(sb, dir);
- } else
+ if (S_ISDIR(mode))
+ group = find_group_orlov(sb, dir);
+ else
group = find_group_other(sb, dir);
err = -ENOSPC;
@@ -605,6 +569,7 @@ got:
}
ext3_debug("allocating inode %lu\n", inode->i_ino);
+ trace_ext3_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext3_std_error(sb, err);
@@ -621,7 +586,7 @@ fail_free_drop:
fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
- inode->i_nlink = 0;
+ clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
brelse(bitmap_bh);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0aedb27..71b263f 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
#include <linux/bio.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <trace/events/ext3.h>
#include "xattr.h"
#include "acl.h"
static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext3_block_truncate_page(struct inode *inode, loff_t from);
/*
* Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
might_sleep();
+ trace_ext3_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
*/
void ext3_evict_inode (struct inode *inode)
{
+ struct ext3_inode_info *ei = EXT3_I(inode);
struct ext3_block_alloc_info *rsv;
handle_t *handle;
int want_delete = 0;
+ trace_ext3_evict_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
want_delete = 1;
}
+ /*
+ * When journalling data dirty buffers are tracked only in the journal.
+ * So although mm thinks everything is clean and ready for reaping the
+ * inode might still have some pages to write in the running
+ * transaction or waiting to be checkpointed. Thus calling
+ * journal_invalidatepage() (via truncate_inode_pages()) to discard
+ * these buffers can cause data loss. Also even if we did not discard
+ * these buffers, we would have no way to find them after the inode
+ * is reaped and thus user could see stale data if he tries to read
+ * them before the transaction is checkpointed. So be careful and
+ * force everything to disk here... We use ei->i_datasync_tid to
+ * store the newest transaction containing inode's data.
+ *
+ * Note that directories do not have this problem because they don't
+ * use page cache.
+ */
+ if (inode->i_nlink && ext3_should_journal_data(inode) &&
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
+ journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
+
+ log_start_commit(journal, commit_tid);
+ log_wait_commit(journal, commit_tid);
+ filemap_write_and_wait(&inode->i_data);
+ }
truncate_inode_pages(&inode->i_data, 0);
ext3_discard_reservation(inode);
- rsv = EXT3_I(inode)->i_block_alloc_info;
- EXT3_I(inode)->i_block_alloc_info = NULL;
+ rsv = ei->i_block_alloc_info;
+ ei->i_block_alloc_info = NULL;
if (unlikely(rsv))
kfree(rsv);
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
if (inode->i_blocks)
ext3_truncate(inode);
/*
- * Kill off the orphan record which ext3_truncate created.
- * AKPM: I think this can be inside the above `if'.
- * Note that ext3_orphan_del() has to be able to cope with the
- * deletion of a non-existent orphan - this is because we don't
- * know if ext3_truncate() actually created an orphan record.
- * (Well, we could do this if we need to, but heck - it works)
+ * Kill off the orphan record created when the inode lost the last
+ * link. Note that ext3_orphan_del() has to be able to cope with the
+ * deletion of a non-existent orphan - ext3_truncate() could
+ * have removed the record.
*/
ext3_orphan_del(handle, inode);
- EXT3_I(inode)->i_dtime = get_seconds();
+ ei->i_dtime = get_seconds();
/*
* One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
ext3_fsblk_t first_block = 0;
+ trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
if (!create || err == -EIO)
goto cleanup;
+ /*
+ * Block out ext3_truncate while we alter the tree
+ */
mutex_lock(&ei->truncate_mutex);
/*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
count = ext3_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary);
- /*
- * Block out ext3_truncate while we alter the tree
- */
err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
offsets + (partial - chain), partial);
@@ -970,6 +999,9 @@ cleanup:
}
BUFFER_TRACE(bh_result, "returned");
out:
+ trace_ext3_get_blocks_exit(inode, iblock,
+ depth ? le32_to_cpu(chain[depth-1].key) : 0,
+ count, err);
return err;
}
@@ -1102,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
return bh;
if (buffer_uptodate(bh))
return bh;
- ll_rw_block(READ_META, 1, &bh);
+ ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
ext3_truncate(inode);
}
+/*
+ * Truncate blocks that were not used by direct IO write. We have to zero out
+ * the last file block as well because direct IO might have written to it.
+ */
+static void ext3_truncate_failed_direct_write(struct inode *inode)
+{
+ ext3_block_truncate_page(inode, inode->i_size);
+ ext3_truncate(inode);
+}
+
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
* we allocate blocks but write fails for some reason */
int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
+ trace_ext3_write_begin(inode, pos, len, flags);
+
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
unsigned from, to;
int ret = 0, ret2;
+ trace_ext3_ordered_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
struct inode *inode = file->f_mapping->host;
int ret;
+ trace_ext3_writeback_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
update_file_sizes(inode, pos, copied);
/*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
{
handle_t *handle = ext3_journal_current_handle();
struct inode *inode = mapping->host;
+ struct ext3_inode_info *ei = EXT3_I(inode);
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
+ trace_ext3_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
- if (inode->i_size > EXT3_I(inode)->i_disksize) {
- EXT3_I(inode)->i_disksize = inode->i_size;
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+ if (inode->i_size > ei->i_disksize) {
+ ei->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
@@ -1583,6 +1632,7 @@ static int ext3_ordered_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_ordered_writepage(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1659,6 +1709,7 @@ static int ext3_writeback_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_writeback_writepage(page);
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1707,6 +1758,7 @@ static int ext3_journalled_writepage(struct page *page,
if (ext3_journal_current_handle())
goto no_write;
+ trace_ext3_journalled_writepage(page);
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1733,6 +1785,8 @@ static int ext3_journalled_writepage(struct page *page,
if (ret == 0)
ret = err;
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
+ atomic_set(&EXT3_I(inode)->i_datasync_tid,
+ handle->h_transaction->t_tid);
unlock_page(page);
} else {
/*
@@ -1757,6 +1811,7 @@ out_unlock:
static int ext3_readpage(struct file *file, struct page *page)
{
+ trace_ext3_readpage(page);
return mpage_readpage(page, ext3_get_block);
}
@@ -1771,6 +1826,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_invalidatepage(page, offset);
+
/*
* If it's a full truncate we just forget about the pending dirtying
*/
@@ -1784,6 +1841,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_releasepage(page);
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
@@ -1812,6 +1870,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_length(iov, nr_segs);
int retries = 0;
+ trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
if (rw == WRITE) {
loff_t final_size = offset + count;
@@ -1834,9 +1894,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
}
retry:
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext3_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ ext3_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
@@ -1846,7 +1905,7 @@ retry:
loff_t end = offset + iov_length(iov, nr_segs);
if (end > isize)
- vmtruncate(inode, isize);
+ ext3_truncate_failed_direct_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -1860,7 +1919,7 @@ retry:
/* This is really bad luck. We've written the data
* but cannot extend i_size. Truncate allocated blocks
* and pretend the write failed... */
- ext3_truncate(inode);
+ ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
goto out;
}
@@ -1886,6 +1945,8 @@ retry:
ret = err;
}
out:
+ trace_ext3_direct_IO_exit(inode, offset,
+ iov_length(iov, nr_segs), rw, ret);
return ret;
}
@@ -1968,17 +2029,24 @@ void ext3_set_aops(struct inode *inode)
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
- struct address_space *mapping, loff_t from)
+static int ext3_block_truncate_page(struct inode *inode, loff_t from)
{
ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, length, pos;
- struct inode *inode = mapping->host;
+ struct page *page;
+ handle_t *handle = NULL;
struct buffer_head *bh;
int err = 0;
+ /* Truncated on block boundary - nothing to do */
blocksize = inode->i_sb->s_blocksize;
+ if ((from & (blocksize - 1)) == 0)
+ return 0;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+ return -ENOMEM;
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
@@ -2023,11 +2091,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
+ /* data=writeback mode doesn't need transaction to zero-out data */
+ if (!ext3_should_writeback_data(inode)) {
+ /* We journal at most one block */
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ clear_highpage(page);
+ flush_dcache_page(page);
+ err = PTR_ERR(handle);
+ goto unlock;
+ }
+ }
+
if (ext3_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext3_journal_get_write_access(handle, bh);
if (err)
- goto unlock;
+ goto stop;
}
zero_user(page, offset, length);
@@ -2041,6 +2121,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
err = ext3_journal_dirty_data(handle, bh);
mark_buffer_dirty(bh);
}
+stop:
+ if (handle)
+ ext3_journal_stop(handle);
unlock:
unlock_page(page);
@@ -2409,8 +2492,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
int ext3_can_truncate(struct inode *inode)
{
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return 0;
if (S_ISREG(inode->i_mode))
return 1;
if (S_ISDIR(inode->i_mode))
@@ -2454,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
struct ext3_inode_info *ei = EXT3_I(inode);
__le32 *i_data = ei->i_data;
int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
int offsets[4];
Indirect chain[4];
Indirect *partial;
@@ -2462,7 +2542,8 @@ void ext3_truncate(struct inode *inode)
int n;
long last_block;
unsigned blocksize = inode->i_sb->s_blocksize;
- struct page *page;
+
+ trace_ext3_truncate_enter(inode);
if (!ext3_can_truncate(inode))
goto out_notrans;
@@ -2470,37 +2551,12 @@ void ext3_truncate(struct inode *inode)
if (inode->i_size == 0 && ext3_should_writeback_data(inode))
ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
- /*
- * We have to lock the EOF page here, because lock_page() nests
- * outside journal_start().
- */
- if ((inode->i_size & (blocksize - 1)) == 0) {
- /* Block boundary? Nothing to do */
- page = NULL;
- } else {
- page = grab_cache_page(mapping,
- inode->i_size >> PAGE_CACHE_SHIFT);
- if (!page)
- goto out_notrans;
- }
-
handle = start_transaction(inode);
- if (IS_ERR(handle)) {
- if (page) {
- clear_highpage(page);
- flush_dcache_page(page);
- unlock_page(page);
- page_cache_release(page);
- }
+ if (IS_ERR(handle))
goto out_notrans;
- }
last_block = (inode->i_size + blocksize-1)
>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (page)
- ext3_block_truncate_page(handle, page, mapping, inode->i_size);
-
n = ext3_block_to_path(inode, last_block, offsets, NULL);
if (n == 0)
goto out_stop; /* error */
@@ -2615,6 +2671,7 @@ out_stop:
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
+ trace_ext3_truncate_exit(inode);
return;
out_notrans:
/*
@@ -2623,6 +2680,7 @@ out_notrans:
*/
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
+ trace_ext3_truncate_exit(inode);
}
static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2764,9 +2822,10 @@ make_io:
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
+ trace_ext3_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ_META, bh);
+ submit_bh(READ | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
ext3_error(inode->i_sb, "ext3_get_inode_loc",
@@ -2858,7 +2917,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
- inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+ set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
@@ -3245,6 +3304,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
ext3_journal_stop(handle);
}
+ if (attr->ia_valid & ATTR_SIZE)
+ inode_dio_wait(inode);
+
if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
handle_t *handle;
@@ -3256,18 +3318,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
}
error = ext3_orphan_add(handle, inode);
+ if (error) {
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
EXT3_I(inode)->i_disksize = attr->ia_size;
- rc = ext3_mark_inode_dirty(handle, inode);
- if (!error)
- error = rc;
+ error = ext3_mark_inode_dirty(handle, inode);
ext3_journal_stop(handle);
+ if (error) {
+ /* Some hard fs error must have happened. Bail out. */
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ rc = ext3_block_truncate_page(inode, attr->ia_size);
+ if (rc) {
+ /* Cleanup orphan list and exit */
+ handle = ext3_journal_start(inode, 3);
+ if (IS_ERR(handle)) {
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ ext3_orphan_del(handle, inode);
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
}
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
- rc = vmtruncate(inode, attr->ia_size);
- if (rc)
- goto err_out;
+ truncate_setsize(inode, attr->ia_size);
+ ext3_truncate(inode);
}
setattr_copy(inode, attr);
@@ -3401,6 +3481,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err;
might_sleep();
+ trace_ext3_mark_inode_dirty(inode, _RET_IP_);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (!err)
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index f4090bd..ba1b54e 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -150,30 +150,6 @@ setversion_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
-#ifdef CONFIG_JBD_DEBUG
- case EXT3_IOC_WAIT_FOR_READONLY:
- /*
- * This is racy - by the time we're woken up and running,
- * the superblock could be released. And the module could
- * have been unloaded. So sue me.
- *
- * Returns 1 if it slept, else zero.
- */
- {
- struct super_block *sb = inode->i_sb;
- DECLARE_WAITQUEUE(wait, current);
- int ret = 0;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
- if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
- schedule();
- ret = 1;
- }
- remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
- return ret;
- }
-#endif
case EXT3_IOC_GETRSVSZ:
if (test_opt(inode->i_sb, RESERVATION)
&& S_ISREG(inode->i_mode)
@@ -285,7 +261,7 @@ group_add_out:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (copy_from_user(&range, (struct fstrim_range *)arg,
+ if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
@@ -293,7 +269,7 @@ group_add_out:
if (ret < 0)
return ret;
- if (copy_to_user((struct fstrim_range *)arg, &range,
+ if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 8c9f82d..1272dfb 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include <trace/events/ext3.h>
#include "namei.h"
#include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
while (len--) printk("%c", *name++);
ext3fs_dirhash(de->name, de->name_len, &h);
printk(":%x.%u ", h.hash,
- ((char *) de - base));
+ (unsigned) ((char *) de - base));
}
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
@@ -918,7 +919,8 @@ restart:
bh = ext3_getblk(NULL, dir, b++, 0, &err);
bh_use[ra_max] = bh;
if (bh)
- ll_rw_block(READ_META, 1, &bh);
+ ll_rw_block(READ | REQ_META | REQ_PRIO,
+ 1, &bh);
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1010,7 +1012,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
*err = -ENOENT;
errout:
- dxtrace(printk("%s not found\n", name));
+ dxtrace(printk("%s not found\n", entry->name));
dx_release (frames);
return NULL;
}
@@ -1035,15 +1037,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
return ERR_PTR(-EIO);
}
inode = ext3_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -ESTALE) {
- ext3_error(dir->i_sb, __func__,
- "deleted inode referenced: %lu",
- ino);
- return ERR_PTR(-EIO);
- } else {
- return ERR_CAST(inode);
- }
+ if (inode == ERR_PTR(-ESTALE)) {
+ ext3_error(dir->i_sb, __func__,
+ "deleted inode referenced: %lu",
+ ino);
+ return ERR_PTR(-EIO);
}
}
return d_splice_alias(inode, dentry);
@@ -1820,7 +1818,7 @@ retry:
de->name_len = 2;
strcpy (de->name, "..");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, dir_block);
if (err)
@@ -1832,7 +1830,7 @@ retry:
if (err) {
out_clear_inode:
- inode->i_nlink = 0;
+ clear_nlink(inode);
unlock_new_inode(inode);
ext3_mark_inode_dirty(handle, inode);
iput (inode);
@@ -2141,6 +2139,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
struct ext3_dir_entry_2 * de;
handle_t *handle;
+ trace_ext3_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
@@ -2168,7 +2167,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
ext3_warning (inode->i_sb, "ext3_unlink",
"Deleting nonexistent file (%lu), %d",
inode->i_ino, inode->i_nlink);
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
}
retval = ext3_delete_entry(handle, dir, de, bh);
if (retval)
@@ -2186,6 +2185,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
end_unlink:
ext3_journal_stop(handle);
brelse (bh);
+ trace_ext3_unlink_exit(dentry, retval);
return retval;
}
@@ -2532,7 +2532,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .get_acl = ext3_get_acl,
};
const struct inode_operations ext3_special_inode_operations = {
@@ -2543,5 +2543,5 @@ const struct inode_operations ext3_special_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .get_acl = ext3_get_acl,
};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ba57a63..562ede3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
#include "acl.h"
#include "namei.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static int ext3_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext3_drop_inode(inode, drop);
+ return drop;
+}
+
static void ext3_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -641,8 +652,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",nouid32");
if (test_opt(sb, DEBUG))
seq_puts(seq, ",debug");
- if (test_opt(sb, OLDALLOC))
- seq_puts(seq, ",oldalloc");
#ifdef CONFIG_EXT3_FS_XATTR
if (test_opt(sb, XATTR_USER))
seq_puts(seq, ",user_xattr");
@@ -788,6 +797,7 @@ static const struct super_operations ext3_sops = {
.destroy_inode = ext3_destroy_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
+ .drop_inode = ext3_drop_inode,
.evict_inode = ext3_evict_inode,
.put_super = ext3_put_super,
.sync_fs = ext3_sync_fs,
@@ -1037,10 +1047,12 @@ static int parse_options (char *options, struct super_block *sb,
set_opt (sbi->s_mount_opt, DEBUG);
break;
case Opt_oldalloc:
- set_opt (sbi->s_mount_opt, OLDALLOC);
+ ext3_msg(sb, KERN_WARNING,
+ "Ignoring deprecated oldalloc option");
break;
case Opt_orlov:
- clear_opt (sbi->s_mount_opt, OLDALLOC);
+ ext3_msg(sb, KERN_WARNING,
+ "Ignoring deprecated orlov option");
break;
#ifdef CONFIG_EXT3_FS_XATTR
case Opt_user_xattr:
@@ -1291,13 +1303,6 @@ set_qf_format:
"not specified.");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext3_msg(sb, KERN_ERR, "error: journaled quota format "
- "specified with no journaling "
- "enabled.");
- return 0;
- }
}
#endif
return 1;
@@ -1718,6 +1723,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+ /* enable barriers by default */
+ set_opt(sbi->s_mount_opt, BARRIER);
set_opt(sbi->s_mount_opt, RESERVATION);
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
@@ -2507,6 +2514,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
+ trace_ext3_sync_fs(sb, wait);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2654,13 +2662,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
/*
* If we have an unprocessed orphan list hanging
* around from a previously readonly bdev mount,
- * require a full umount/remount for now.
+ * require a full umount & mount for now.
*/
if (es->s_last_orphan) {
ext3_msg(sb, KERN_WARNING, "warning: couldn't "
"remount RDWR because of unprocessed "
"orphan inode list. Please "
- "umount/remount instead.");
+ "umount & mount instead.");
err = -EINVAL;
goto restore_opts;
}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index b8d9f83..3c218b8 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -48,28 +48,32 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
name, value, size, flags);
}
-int
-ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
- const struct qstr *qstr)
+int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
{
- int err;
- size_t len;
- void *value;
- char *name;
+ const struct xattr *xattr;
+ handle_t *handle = fs_info;
+ int err = 0;
- err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = ext3_xattr_set_handle(handle, inode,
+ EXT3_XATTR_INDEX_SECURITY,
+ xattr->name, xattr->value,
+ xattr->value_len, 0);
+ if (err < 0)
+ break;
}
- err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
- name, value, len, 0);
- kfree(name);
- kfree(value);
return err;
}
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
+{
+ return security_inode_init_security(inode, dir, qstr,
+ &ext3_initxattrs, handle);
+}
+
const struct xattr_handler ext3_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.list = ext3_xattr_security_list,