aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorcodeworkx <codeworkx@cyanogenmod.com>2012-09-18 18:51:21 +0200
committercodeworkx <codeworkx@cyanogenmod.com>2012-09-18 19:02:23 +0200
commitf991bd2a427ec6a5e049e19745aba6a5d7f006c4 (patch)
treec6f0d428403c32c95335c34b3f0105c2e4c8087d /fs
parentc28265764ec6ad9995eb0c761a376ffc9f141fcd (diff)
parentbea37381fd9a34c6660e5195d31beea86aa3dda3 (diff)
downloadkernel_samsung_smdk4412-f991bd2a427ec6a5e049e19745aba6a5d7f006c4.zip
kernel_samsung_smdk4412-f991bd2a427ec6a5e049e19745aba6a5d7f006c4.tar.gz
kernel_samsung_smdk4412-f991bd2a427ec6a5e049e19745aba6a5d7f006c4.tar.bz2
Merge linux-3.0.31 from korg into jellybean
Conflicts: arch/arm/mm/proc-v7.S drivers/base/core.c drivers/gpu/drm/i915/i915_gem_execbuffer.c drivers/gpu/drm/i915/intel_display.c drivers/gpu/drm/i915/intel_lvds.c drivers/gpu/drm/radeon/evergreen.c drivers/gpu/drm/radeon/r100.c drivers/gpu/drm/radeon/radeon_connectors.c drivers/gpu/drm/radeon/rs600.c drivers/usb/core/hub.c drivers/usb/host/xhci-pci.c drivers/usb/host/xhci.c drivers/usb/serial/qcserial.c fs/proc/base.c Change-Id: Ia98b35db3f8c0bfd95817867d3acb85be8e5e772
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/internal.h2
-rw-r--r--fs/afs/rxrpc.c3
-rw-r--r--fs/aio.c22
-rw-r--r--fs/autofs4/autofs_i.h11
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/waitq.c2
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/block_dev.c16
-rw-r--r--fs/btrfs/ctree.h2
-rw-r--r--fs/cifs/connect.c3
-rw-r--r--fs/cifs/dir.c20
-rw-r--r--fs/cifs/sess.c7
-rw-r--r--fs/dcache.c17
-rw-r--r--fs/ecryptfs/crypto.c49
-rw-r--r--fs/ecryptfs/inode.c50
-rw-r--r--fs/ecryptfs/miscdev.c56
-rw-r--r--fs/ecryptfs/read_write.c23
-rw-r--r--fs/eventpoll.c268
-rw-r--r--fs/ext3/inode.c24
-rw-r--r--fs/ext4/ext4_jbd2.h56
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/inode.c45
-rw-r--r--fs/ext4/super.c7
-rw-r--r--fs/hfsplus/catalog.c4
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hugetlbfs/inode.c25
-rw-r--r--fs/jbd2/commit.c4
-rw-r--r--fs/jbd2/transaction.c2
-rw-r--r--fs/lockd/clnt4xdr.c2
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/namei.c4
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/delegation.c11
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4proc.c54
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/objlayout/objio_osd.c3
-rw-r--r--fs/nfs/objlayout/objlayout.c10
-rw-r--r--fs/nfs/pnfs.c12
-rw-r--r--fs/nfs/pnfs.h1
-rw-r--r--fs/nfs/super.c51
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/nfsd/nfs3xdr.c22
-rw-r--r--fs/nfsd/nfs4proc.c7
-rw-r--r--fs/nfsd/nfs4state.c23
-rw-r--r--fs/nilfs2/ioctl.c13
-rw-r--r--fs/nilfs2/the_nilfs.c1
-rw-r--r--fs/notify/mark.c8
-rw-r--r--fs/ocfs2/alloc.c2
-rw-r--r--fs/ocfs2/refcounttree.c12
-rw-r--r--fs/ocfs2/suballoc.c4
-rw-r--r--fs/partitions/check.c48
-rw-r--r--fs/pipe.c31
-rw-r--r--fs/proc/base.c108
-rw-r--r--fs/proc/namespaces.c2
-rw-r--r--fs/proc/task_mmu.c12
-rw-r--r--fs/proc/uptime.c9
-rw-r--r--fs/reiserfs/super.c27
-rw-r--r--fs/signalfd.c15
-rw-r--r--fs/splice.c5
-rw-r--r--fs/sysfs/inode.c11
-rw-r--r--fs/ubifs/debug.h17
-rw-r--r--fs/udf/file.c8
-rw-r--r--fs/udf/inode.c21
-rw-r--r--fs/udf/super.c6
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c30
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c37
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/xfs_iget.c18
-rw-r--r--fs/xfs/xfs_log_recover.c33
-rw-r--r--fs/xfs/xfs_vnodeops.c3
76 files changed, 1013 insertions, 462 deletions
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 5a9b684..1f3624d 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -109,7 +109,7 @@ struct afs_call {
unsigned reply_size; /* current size of reply */
unsigned first_offset; /* offset into mapping[first] */
unsigned last_to; /* amount of mapping[last] */
- unsigned short offset; /* offset into received data store */
+ unsigned offset; /* offset into received data store */
unsigned char unmarshall; /* unmarshalling phase */
bool incoming; /* T if incoming call */
bool send_pages; /* T if data from mapping should be sent */
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index e45a323..8ad8c2a 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -314,6 +314,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
struct msghdr msg;
struct kvec iov[1];
int ret;
+ struct sk_buff *skb;
_enter("%x,{%d},", addr->s_addr, ntohs(call->port));
@@ -380,6 +381,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
error_do_abort:
rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT);
+ while ((skb = skb_dequeue(&call->rx_queue)))
+ afs_free_skb(skb);
rxrpc_kernel_end_call(rxcall);
call->rxcall = NULL;
error_kill_call:
diff --git a/fs/aio.c b/fs/aio.c
index e29ec48..b4a88cc 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -228,12 +228,6 @@ static void __put_ioctx(struct kioctx *ctx)
call_rcu(&ctx->rcu_head, ctx_rcu_free);
}
-static inline void get_ioctx(struct kioctx *kioctx)
-{
- BUG_ON(atomic_read(&kioctx->users) <= 0);
- atomic_inc(&kioctx->users);
-}
-
static inline int try_get_ioctx(struct kioctx *kioctx)
{
return atomic_inc_not_zero(&kioctx->users);
@@ -273,7 +267,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
mm = ctx->mm = current->mm;
atomic_inc(&mm->mm_count);
- atomic_set(&ctx->users, 1);
+ atomic_set(&ctx->users, 2);
spin_lock_init(&ctx->ctx_lock);
spin_lock_init(&ctx->ring_info.ring_lock);
init_waitqueue_head(&ctx->wait);
@@ -527,11 +521,16 @@ static void aio_fput_routine(struct work_struct *data)
fput(req->ki_filp);
/* Link the iocb into the context's free list */
+ rcu_read_lock();
spin_lock_irq(&ctx->ctx_lock);
really_put_req(ctx, req);
+ /*
+ * at that point ctx might've been killed, but actual
+ * freeing is RCU'd
+ */
spin_unlock_irq(&ctx->ctx_lock);
+ rcu_read_unlock();
- put_ioctx(ctx);
spin_lock_irq(&fput_lock);
}
spin_unlock_irq(&fput_lock);
@@ -562,7 +561,6 @@ static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
* this function will be executed w/out any aio kthread wakeup.
*/
if (unlikely(!fput_atomic(req->ki_filp))) {
- get_ioctx(ctx);
spin_lock(&fput_lock);
list_add(&req->ki_list, &fput_head);
spin_unlock(&fput_lock);
@@ -1256,10 +1254,10 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
ret = PTR_ERR(ioctx);
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
- if (!ret)
+ if (!ret) {
+ put_ioctx(ioctx);
return 0;
-
- get_ioctx(ioctx); /* io_destroy() expects us to hold a ref */
+ }
io_destroy(ioctx);
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 475f9c5..756d328 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -278,6 +278,17 @@ int autofs4_fill_super(struct super_block *, void *, int);
struct autofs_info *autofs4_new_ino(struct autofs_sb_info *);
void autofs4_clean_ino(struct autofs_info *);
+static inline int autofs_prepare_pipe(struct file *pipe)
+{
+ if (!pipe->f_op || !pipe->f_op->write)
+ return -EINVAL;
+ if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode))
+ return -EINVAL;
+ /* We want a packet pipe */
+ pipe->f_flags |= O_DIRECT;
+ return 0;
+}
+
/* Queue management functions */
int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 509fe1e..de54271 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -376,7 +376,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
err = -EBADF;
goto out;
}
- if (!pipe->f_op || !pipe->f_op->write) {
+ if (autofs_prepare_pipe(pipe) < 0) {
err = -EPIPE;
fput(pipe);
goto out;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 180fa24..7c26678 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -292,7 +292,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
printk("autofs: could not open pipe file descriptor\n");
goto fail_dput;
}
- if (!pipe->f_op || !pipe->f_op->write)
+ if (autofs_prepare_pipe(pipe) < 0)
goto fail_fput;
sbi->pipe = pipe;
sbi->pipefd = pipefd;
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 2543598..813ea10 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -90,7 +90,7 @@ static int autofs4_write(struct file *file, const void *addr, int bytes)
return (bytes > 0);
}
-
+
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 9ba2ac7..618493e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1422,7 +1422,7 @@ static int fill_thread_core_info(struct elf_thread_core_info *t,
for (i = 1; i < view->n; ++i) {
const struct user_regset *regset = &view->regsets[i];
do_thread_regset_writeback(t->task, regset);
- if (regset->core_note_type &&
+ if (regset->core_note_type && regset->get &&
(!regset->active || regset->active(t->task, regset))) {
int ret;
size_t size = regset->n * regset->size;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 34503ba..74fc5ed 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1149,8 +1149,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
* The latter is necessary to prevent ghost
* partitions on a removed medium.
*/
- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
- rescan_partitions(disk, bdev);
+ if (bdev->bd_invalidated) {
+ if (!ret)
+ rescan_partitions(disk, bdev);
+ else if (ret == -ENOMEDIUM)
+ invalidate_partitions(disk, bdev);
+ }
if (ret)
goto out_clear;
} else {
@@ -1180,8 +1184,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
- rescan_partitions(bdev->bd_disk, bdev);
+ if (bdev->bd_invalidated) {
+ if (!ret)
+ rescan_partitions(bdev->bd_disk, bdev);
+ else if (ret == -ENOMEDIUM)
+ invalidate_partitions(bdev->bd_disk, bdev);
+ }
if (ret)
goto out_unlock_bdev;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3b859a3..66179bc 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1972,7 +1972,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item,
static inline bool btrfs_root_readonly(struct btrfs_root *root)
{
- return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY;
+ return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0;
}
/* struct btrfs_super_block */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index cb85825..b775809 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3004,7 +3004,7 @@ cifs_get_volume_info(char *mount_data, const char *devname)
int
cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
{
- int rc = 0;
+ int rc;
int xid;
struct cifs_ses *pSesInfo;
struct cifs_tcon *tcon;
@@ -3033,6 +3033,7 @@ try_mount_again:
FreeXid(xid);
}
#endif
+ rc = 0;
tcon = NULL;
pSesInfo = NULL;
srvTcp = NULL;
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 16cdd6d..ed5c07b 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -583,10 +583,26 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
* If either that or op not supported returned, follow
* the normal lookup.
*/
- if ((rc == 0) || (rc == -ENOENT))
+ switch (rc) {
+ case 0:
+ /*
+ * The server may allow us to open things like
+ * FIFOs, but the client isn't set up to deal
+ * with that. If it's not a regular file, just
+ * close it and proceed as if it were a normal
+ * lookup.
+ */
+ if (newInode && !S_ISREG(newInode->i_mode)) {
+ CIFSSMBClose(xid, pTcon, fileHandle);
+ break;
+ }
+ case -ENOENT:
posix_open = true;
- else if ((rc == -EINVAL) || (rc != -EOPNOTSUPP))
+ case -EOPNOTSUPP:
+ break;
+ default:
pTcon->broken_posix_open = true;
+ }
}
if (!posix_open)
rc = cifs_get_inode_info_unix(&newInode, full_path,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index d3e6196..0cfae19 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -244,16 +244,15 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
/* copy user */
/* BB what about null user mounts - check that we do this BB */
/* copy user */
- if (ses->user_name != NULL)
+ if (ses->user_name != NULL) {
strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
+ bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
+ }
/* else null user mount */
-
- bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
*bcc_ptr = 0;
bcc_ptr++; /* account for null termination */
/* copy domain */
-
if (ses->domainName != NULL) {
strncpy(bcc_ptr, ses->domainName, 256);
bcc_ptr += strnlen(ses->domainName, 256);
diff --git a/fs/dcache.c b/fs/dcache.c
index d2f8feb..0b51cfc 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -241,6 +241,7 @@ static void dentry_lru_add(struct dentry *dentry)
static void __dentry_lru_del(struct dentry *dentry)
{
list_del_init(&dentry->d_lru);
+ dentry->d_flags &= ~DCACHE_SHRINK_LIST;
dentry->d_sb->s_nr_dentry_unused--;
dentry_stat.nr_unused--;
}
@@ -753,6 +754,7 @@ relock:
spin_unlock(&dentry->d_lock);
} else {
list_move_tail(&dentry->d_lru, &tmp);
+ dentry->d_flags |= DCACHE_SHRINK_LIST;
spin_unlock(&dentry->d_lock);
if (!--cnt)
break;
@@ -1144,14 +1146,18 @@ resume:
/*
* move only zero ref count dentries to the end
* of the unused list for prune_dcache
+ *
+ * Those which are presently on the shrink list, being processed
+ * by shrink_dentry_list(), shouldn't be moved. Otherwise the
+ * loop in shrink_dcache_parent() might not make any progress
+ * and loop forever.
*/
- if (!dentry->d_count) {
+ if (dentry->d_count) {
+ dentry_lru_del(dentry);
+ } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
dentry_lru_move_tail(dentry);
found++;
- } else {
- dentry_lru_del(dentry);
}
-
/*
* We can return to the caller if we have found some (this
* ensures forward progress). We'll be coming back to find
@@ -2427,6 +2433,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
if (d_ancestor(alias, dentry)) {
/* Check for loops */
actual = ERR_PTR(-ELOOP);
+ spin_unlock(&inode->i_lock);
} else if (IS_ROOT(alias)) {
/* Is this an anonymous mountpoint that we
* could splice into our tree? */
@@ -2436,7 +2443,7 @@ struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
goto found;
} else {
/* Nope, but we must(!) avoid directory
- * aliasing */
+ * aliasing. This drops inode->i_lock */
actual = __d_unalias(inode, dentry, alias);
}
write_sequnlock(&rename_lock);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 7cf5c3e..c6602d2 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -417,17 +417,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
(unsigned long long)(extent_base + extent_offset), rc);
goto out;
}
- if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Encrypting extent "
- "with iv:\n");
- ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
- ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
- "encryption:\n");
- ecryptfs_dump_hex((char *)
- (page_address(page)
- + (extent_offset * crypt_stat->extent_size)),
- 8);
- }
rc = ecryptfs_encrypt_page_offset(crypt_stat, enc_extent_page, 0,
page, (extent_offset
* crypt_stat->extent_size),
@@ -440,14 +429,6 @@ static int ecryptfs_encrypt_extent(struct page *enc_extent_page,
goto out;
}
rc = 0;
- if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Encrypt extent [0x%.16llx]; "
- "rc = [%d]\n",
- (unsigned long long)(extent_base + extent_offset), rc);
- ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
- "encryption:\n");
- ecryptfs_dump_hex((char *)(page_address(enc_extent_page)), 8);
- }
out:
return rc;
}
@@ -543,17 +524,6 @@ static int ecryptfs_decrypt_extent(struct page *page,
(unsigned long long)(extent_base + extent_offset), rc);
goto out;
}
- if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Decrypting extent "
- "with iv:\n");
- ecryptfs_dump_hex(extent_iv, crypt_stat->iv_bytes);
- ecryptfs_printk(KERN_DEBUG, "First 8 bytes before "
- "decryption:\n");
- ecryptfs_dump_hex((char *)
- (page_address(enc_extent_page)
- + (extent_offset * crypt_stat->extent_size)),
- 8);
- }
rc = ecryptfs_decrypt_page_offset(crypt_stat, page,
(extent_offset
* crypt_stat->extent_size),
@@ -567,16 +537,6 @@ static int ecryptfs_decrypt_extent(struct page *page,
goto out;
}
rc = 0;
- if (unlikely(ecryptfs_verbosity > 0)) {
- ecryptfs_printk(KERN_DEBUG, "Decrypt extent [0x%.16llx]; "
- "rc = [%d]\n",
- (unsigned long long)(extent_base + extent_offset), rc);
- ecryptfs_printk(KERN_DEBUG, "First 8 bytes after "
- "decryption:\n");
- ecryptfs_dump_hex((char *)(page_address(page)
- + (extent_offset
- * crypt_stat->extent_size)), 8);
- }
out:
return rc;
}
@@ -1618,7 +1578,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
if (rc) {
printk(KERN_DEBUG "Valid eCryptfs headers not found in "
- "file header region or xattr region\n");
+ "file header region or xattr region, inode %lu\n",
+ ecryptfs_inode->i_ino);
rc = -EINVAL;
goto out;
}
@@ -1627,7 +1588,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
ECRYPTFS_DONT_VALIDATE_HEADER_SIZE);
if (rc) {
printk(KERN_DEBUG "Valid eCryptfs headers not found in "
- "file xattr region either\n");
+ "file xattr region either, inode %lu\n",
+ ecryptfs_inode->i_ino);
rc = -EINVAL;
}
if (crypt_stat->mount_crypt_stat->flags
@@ -1638,7 +1600,8 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
"crypto metadata only in the extended attribute "
"region, but eCryptfs was mounted without "
"xattr support enabled. eCryptfs will not treat "
- "this like an encrypted file.\n");
+ "this like an encrypted file, inode %lu\n",
+ ecryptfs_inode->i_ino);
rc = -EINVAL;
}
}
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 876b25c..28f09c7 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -887,18 +887,6 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
size_t num_zeros = (PAGE_CACHE_SIZE
- (ia->ia_size & ~PAGE_CACHE_MASK));
-
- /*
- * XXX(truncate) this should really happen at the begginning
- * of ->setattr. But the code is too messy to that as part
- * of a larger patch. ecryptfs is also totally missing out
- * on the inode_change_ok check at the beginning of
- * ->setattr while would include this.
- */
- rc = inode_newsize_ok(inode, ia->ia_size);
- if (rc)
- goto out;
-
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
truncate_setsize(inode, ia->ia_size);
lower_ia->ia_size = ia->ia_size;
@@ -948,6 +936,28 @@ out:
return rc;
}
+static int ecryptfs_inode_newsize_ok(struct inode *inode, loff_t offset)
+{
+ struct ecryptfs_crypt_stat *crypt_stat;
+ loff_t lower_oldsize, lower_newsize;
+
+ crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+ lower_oldsize = upper_size_to_lower_size(crypt_stat,
+ i_size_read(inode));
+ lower_newsize = upper_size_to_lower_size(crypt_stat, offset);
+ if (lower_newsize > lower_oldsize) {
+ /*
+ * The eCryptfs inode and the new *lower* size are mixed here
+ * because we may not have the lower i_mutex held and/or it may
+ * not be appropriate to call inode_newsize_ok() with inodes
+ * from other filesystems.
+ */
+ return inode_newsize_ok(inode, lower_newsize);
+ }
+
+ return 0;
+}
+
/**
* ecryptfs_truncate
* @dentry: The ecryptfs layer dentry
@@ -964,6 +974,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
struct iattr lower_ia = { .ia_valid = 0 };
int rc;
+ rc = ecryptfs_inode_newsize_ok(dentry->d_inode, new_length);
+ if (rc)
+ return rc;
+
rc = truncate_upper(dentry, &ia, &lower_ia);
if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
@@ -1045,6 +1059,16 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
}
}
mutex_unlock(&crypt_stat->cs_mutex);
+
+ rc = inode_change_ok(inode, ia);
+ if (rc)
+ goto out;
+ if (ia->ia_valid & ATTR_SIZE) {
+ rc = ecryptfs_inode_newsize_ok(inode, ia->ia_size);
+ if (rc)
+ goto out;
+ }
+
if (S_ISREG(inode->i_mode)) {
rc = filemap_write_and_wait(inode->i_mapping);
if (rc)
@@ -1128,6 +1152,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
}
rc = vfs_setxattr(lower_dentry, name, value, size, flags);
+ if (!rc)
+ fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
out:
return rc;
}
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 940a82e..0dc5a3d 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -409,11 +409,47 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
ssize_t sz = 0;
char *data;
uid_t euid = current_euid();
+ unsigned char packet_size_peek[3];
int rc;
- if (count == 0)
+ if (count == 0) {
goto out;
+ } else if (count == (1 + 4)) {
+ /* Likely a harmless MSG_HELO or MSG_QUIT - no packet length */
+ goto memdup;
+ } else if (count < (1 + 4 + 1)
+ || count > (1 + 4 + 2 + sizeof(struct ecryptfs_message) + 4
+ + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES)) {
+ printk(KERN_WARNING "%s: Acceptable packet size range is "
+ "[%d-%lu], but amount of data written is [%zu].",
+ __func__, (1 + 4 + 1),
+ (1 + 4 + 2 + sizeof(struct ecryptfs_message) + 4
+ + ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES), count);
+ return -EINVAL;
+ }
+
+ if (copy_from_user(packet_size_peek, (buf + 1 + 4),
+ sizeof(packet_size_peek))) {
+ printk(KERN_WARNING "%s: Error while inspecting packet size\n",
+ __func__);
+ return -EFAULT;
+ }
+
+ rc = ecryptfs_parse_packet_length(packet_size_peek, &packet_size,
+ &packet_size_length);
+ if (rc) {
+ printk(KERN_WARNING "%s: Error parsing packet length; "
+ "rc = [%d]\n", __func__, rc);
+ return rc;
+ }
+
+ if ((1 + 4 + packet_size_length + packet_size) != count) {
+ printk(KERN_WARNING "%s: Invalid packet size [%zu]\n", __func__,
+ packet_size);
+ return -EINVAL;
+ }
+memdup:
data = memdup_user(buf, count);
if (IS_ERR(data)) {
printk(KERN_ERR "%s: memdup_user returned error [%ld]\n",
@@ -435,23 +471,7 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
}
memcpy(&counter_nbo, &data[i], 4);
seq = be32_to_cpu(counter_nbo);
- i += 4;
- rc = ecryptfs_parse_packet_length(&data[i], &packet_size,
- &packet_size_length);
- if (rc) {
- printk(KERN_WARNING "%s: Error parsing packet length; "
- "rc = [%d]\n", __func__, rc);
- goto out_free;
- }
- i += packet_size_length;
- if ((1 + 4 + packet_size_length + packet_size) != count) {
- printk(KERN_WARNING "%s: (1 + packet_size_length([%zd])"
- " + packet_size([%zd]))([%zd]) != "
- "count([%zd]). Invalid packet format.\n",
- __func__, packet_size_length, packet_size,
- (1 + packet_size_length + packet_size), count);
- goto out_free;
- }
+ i += 4 + packet_size_length;
rc = ecryptfs_miscdev_response(&data[i], packet_size,
euid, current_user_ns(),
task_pid(current), seq);
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 3745f7c..608c1c3 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -130,13 +130,18 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
- size_t total_remaining_bytes = ((offset + size) - pos);
+ loff_t total_remaining_bytes = ((offset + size) - pos);
+
+ if (fatal_signal_pending(current)) {
+ rc = -EINTR;
+ break;
+ }
if (num_bytes > total_remaining_bytes)
num_bytes = total_remaining_bytes;
if (pos < offset) {
/* remaining zeros to write, up to destination offset */
- size_t total_remaining_zeros = (offset - pos);
+ loff_t total_remaining_zeros = (offset - pos);
if (num_bytes > total_remaining_zeros)
num_bytes = total_remaining_zeros;
@@ -193,15 +198,19 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
}
pos += num_bytes;
}
- if ((offset + size) > ecryptfs_file_size) {
- i_size_write(ecryptfs_inode, (offset + size));
+ if (pos > ecryptfs_file_size) {
+ i_size_write(ecryptfs_inode, pos);
if (crypt_stat->flags & ECRYPTFS_ENCRYPTED) {
- rc = ecryptfs_write_inode_size_to_metadata(
+ int rc2;
+
+ rc2 = ecryptfs_write_inode_size_to_metadata(
ecryptfs_inode);
- if (rc) {
+ if (rc2) {
printk(KERN_ERR "Problem with "
"ecryptfs_write_inode_size_to_metadata; "
- "rc = [%d]\n", rc);
+ "rc = [%d]\n", rc2);
+ if (!rc)
+ rc = rc2;
goto out;
}
}
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 2acaf60..35a852a 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -197,6 +197,12 @@ struct eventpoll {
/* The user that created the eventpoll descriptor */
struct user_struct *user;
+
+ struct file *file;
+
+ /* used to optimize loop detection check */
+ int visited;
+ struct list_head visited_list_link;
};
/* Wait structure used by the poll hooks */
@@ -255,6 +261,15 @@ static struct kmem_cache *epi_cache __read_mostly;
/* Slab cache used to allocate "struct eppoll_entry" */
static struct kmem_cache *pwq_cache __read_mostly;
+/* Visited nodes during ep_loop_check(), so we can unset them when we finish */
+static LIST_HEAD(visited_list);
+
+/*
+ * List of files with newly added links, where we may need to limit the number
+ * of emanating paths. Protected by the epmutex.
+ */
+static LIST_HEAD(tfile_check_list);
+
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
@@ -276,6 +291,12 @@ ctl_table epoll_table[] = {
};
#endif /* CONFIG_SYSCTL */
+static const struct file_operations eventpoll_fops;
+
+static inline int is_file_epoll(struct file *f)
+{
+ return f->f_op == &eventpoll_fops;
+}
/* Setup the structure that is used as key for the RB tree */
static inline void ep_set_ffd(struct epoll_filefd *ffd,
@@ -299,6 +320,11 @@ static inline int ep_is_linked(struct list_head *p)
return !list_empty(p);
}
+static inline struct eppoll_entry *ep_pwq_from_wait(wait_queue_t *p)
+{
+ return container_of(p, struct eppoll_entry, wait);
+}
+
/* Get the "struct epitem" from a wait queue pointer */
static inline struct epitem *ep_item_from_wait(wait_queue_t *p)
{
@@ -446,6 +472,18 @@ static void ep_poll_safewake(wait_queue_head_t *wq)
put_cpu();
}
+static void ep_remove_wait_queue(struct eppoll_entry *pwq)
+{
+ wait_queue_head_t *whead;
+
+ rcu_read_lock();
+ /* If it is cleared by POLLFREE, it should be rcu-safe */
+ whead = rcu_dereference(pwq->whead);
+ if (whead)
+ remove_wait_queue(whead, &pwq->wait);
+ rcu_read_unlock();
+}
+
/*
* This function unregisters poll callbacks from the associated file
* descriptor. Must be called with "mtx" held (or "epmutex" if called from
@@ -460,7 +498,7 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
pwq = list_first_entry(lsthead, struct eppoll_entry, llink);
list_del(&pwq->llink);
- remove_wait_queue(pwq->whead, &pwq->wait);
+ ep_remove_wait_queue(pwq);
kmem_cache_free(pwq_cache, pwq);
}
}
@@ -711,12 +749,6 @@ static const struct file_operations eventpoll_fops = {
.llseek = noop_llseek,
};
-/* Fast test to see if the file is an evenpoll file */
-static inline int is_file_epoll(struct file *f)
-{
- return f->f_op == &eventpoll_fops;
-}
-
/*
* This is called from eventpoll_release() to unlink files from the eventpoll
* interface. We need to have this facility to cleanup correctly files that are
@@ -827,6 +859,17 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
+ if ((unsigned long)key & POLLFREE) {
+ ep_pwq_from_wait(wait)->whead = NULL;
+ /*
+ * whead = NULL above can race with ep_remove_wait_queue()
+ * which can do another remove_wait_queue() after us, so we
+ * can't use __remove_wait_queue(). whead->lock is held by
+ * the caller.
+ */
+ list_del_init(&wait->task_list);
+ }
+
spin_lock_irqsave(&ep->lock, flags);
/*
@@ -926,6 +969,103 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
rb_insert_color(&epi->rbn, &ep->rbr);
}
+
+
+#define PATH_ARR_SIZE 5
+/*
+ * These are the number paths of length 1 to 5, that we are allowing to emanate
+ * from a single file of interest. For example, we allow 1000 paths of length
+ * 1, to emanate from each file of interest. This essentially represents the
+ * potential wakeup paths, which need to be limited in order to avoid massive
+ * uncontrolled wakeup storms. The common use case should be a single ep which
+ * is connected to n file sources. In this case each file source has 1 path
+ * of length 1. Thus, the numbers below should be more than sufficient. These
+ * path limits are enforced during an EPOLL_CTL_ADD operation, since a modify
+ * and delete can't add additional paths. Protected by the epmutex.
+ */
+static const int path_limits[PATH_ARR_SIZE] = { 1000, 500, 100, 50, 10 };
+static int path_count[PATH_ARR_SIZE];
+
+static int path_count_inc(int nests)
+{
+ /* Allow an arbitrary number of depth 1 paths */
+ if (nests == 0)
+ return 0;
+
+ if (++path_count[nests] > path_limits[nests])
+ return -1;
+ return 0;
+}
+
+static void path_count_init(void)
+{
+ int i;
+
+ for (i = 0; i < PATH_ARR_SIZE; i++)
+ path_count[i] = 0;
+}
+
+static int reverse_path_check_proc(void *priv, void *cookie, int call_nests)
+{
+ int error = 0;
+ struct file *file = priv;
+ struct file *child_file;
+ struct epitem *epi;
+
+ list_for_each_entry(epi, &file->f_ep_links, fllink) {
+ child_file = epi->ep->file;
+ if (is_file_epoll(child_file)) {
+ if (list_empty(&child_file->f_ep_links)) {
+ if (path_count_inc(call_nests)) {
+ error = -1;
+ break;
+ }
+ } else {
+ error = ep_call_nested(&poll_loop_ncalls,
+ EP_MAX_NESTS,
+ reverse_path_check_proc,
+ child_file, child_file,
+ current);
+ }
+ if (error != 0)
+ break;
+ } else {
+ printk(KERN_ERR "reverse_path_check_proc: "
+ "file is not an ep!\n");
+ }
+ }
+ return error;
+}
+
+/**
+ * reverse_path_check - The tfile_check_list is list of file *, which have
+ * links that are proposed to be newly added. We need to
+ * make sure that those added links don't add too many
+ * paths such that we will spend all our time waking up
+ * eventpoll objects.
+ *
+ * Returns: Returns zero if the proposed links don't create too many paths,
+ * -1 otherwise.
+ */
+static int reverse_path_check(void)
+{
+ int length = 0;
+ int error = 0;
+ struct file *current_file;
+
+ /* let's call this for all tfiles */
+ list_for_each_entry(current_file, &tfile_check_list, f_tfile_llink) {
+ length++;
+ path_count_init();
+ error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ reverse_path_check_proc, current_file,
+ current_file, current);
+ if (error)
+ break;
+ }
+ return error;
+}
+
/*
* Must be called with "mtx" held.
*/
@@ -987,6 +1127,11 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
*/
ep_rbtree_insert(ep, epi);
+ /* now check if we've created too many backpaths */
+ error = -EINVAL;
+ if (reverse_path_check())
+ goto error_remove_epi;
+
/* We have to drop the new item inside our item list to keep track of it */
spin_lock_irqsave(&ep->lock, flags);
@@ -1011,6 +1156,14 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
return 0;
+error_remove_epi:
+ spin_lock(&tfile->f_lock);
+ if (ep_is_linked(&epi->fllink))
+ list_del_init(&epi->fllink);
+ spin_unlock(&tfile->f_lock);
+
+ rb_erase(&epi->rbn, &ep->rbr);
+
error_unregister:
ep_unregister_pollwait(ep, epi);
@@ -1275,18 +1428,36 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
int error = 0;
struct file *file = priv;
struct eventpoll *ep = file->private_data;
+ struct eventpoll *ep_tovisit;
struct rb_node *rbp;
struct epitem *epi;
mutex_lock_nested(&ep->mtx, call_nests + 1);
+ ep->visited = 1;
+ list_add(&ep->visited_list_link, &visited_list);
for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
epi = rb_entry(rbp, struct epitem, rbn);
if (unlikely(is_file_epoll(epi->ffd.file))) {
+ ep_tovisit = epi->ffd.file->private_data;
+ if (ep_tovisit->visited)
+ continue;
error = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
- ep_loop_check_proc, epi->ffd.file,
- epi->ffd.file->private_data, current);
+ ep_loop_check_proc, epi->ffd.file,
+ ep_tovisit, current);
if (error != 0)
break;
+ } else {
+ /*
+ * If we've reached a file that is not associated with
+ * an ep, then we need to check if the newly added
+ * links are going to add too many wakeup paths. We do
+ * this by adding it to the tfile_check_list, if it's
+ * not already there, and calling reverse_path_check()
+ * during ep_insert().
+ */
+ if (list_empty(&epi->ffd.file->f_tfile_llink))
+ list_add(&epi->ffd.file->f_tfile_llink,
+ &tfile_check_list);
}
}
mutex_unlock(&ep->mtx);
@@ -1307,8 +1478,31 @@ static int ep_loop_check_proc(void *priv, void *cookie, int call_nests)
*/
static int ep_loop_check(struct eventpoll *ep, struct file *file)
{
- return ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
+ int ret;
+ struct eventpoll *ep_cur, *ep_next;
+
+ ret = ep_call_nested(&poll_loop_ncalls, EP_MAX_NESTS,
ep_loop_check_proc, file, ep, current);
+ /* clear visited list */
+ list_for_each_entry_safe(ep_cur, ep_next, &visited_list,
+ visited_list_link) {
+ ep_cur->visited = 0;
+ list_del(&ep_cur->visited_list_link);
+ }
+ return ret;
+}
+
+static void clear_tfile_check_list(void)
+{
+ struct file *file;
+
+ /* first clear the tfile_check_list */
+ while (!list_empty(&tfile_check_list)) {
+ file = list_first_entry(&tfile_check_list, struct file,
+ f_tfile_llink);
+ list_del_init(&file->f_tfile_llink);
+ }
+ INIT_LIST_HEAD(&tfile_check_list);
}
/*
@@ -1316,8 +1510,9 @@ static int ep_loop_check(struct eventpoll *ep, struct file *file)
*/
SYSCALL_DEFINE1(epoll_create1, int, flags)
{
- int error;
+ int error, fd;
struct eventpoll *ep = NULL;
+ struct file *file;
/* Check the EPOLL_* constant for consistency. */
BUILD_BUG_ON(EPOLL_CLOEXEC != O_CLOEXEC);
@@ -1334,11 +1529,25 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
* Creates all the items needed to setup an eventpoll file. That is,
* a file structure and a free file descriptor.
*/
- error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
+ fd = get_unused_fd_flags(O_RDWR | (flags & O_CLOEXEC));
+ if (fd < 0) {
+ error = fd;
+ goto out_free_ep;
+ }
+ file = anon_inode_getfile("[eventpoll]", &eventpoll_fops, ep,
O_RDWR | (flags & O_CLOEXEC));
- if (error < 0)
- ep_free(ep);
-
+ if (IS_ERR(file)) {
+ error = PTR_ERR(file);
+ goto out_free_fd;
+ }
+ fd_install(fd, file);
+ ep->file = file;
+ return fd;
+
+out_free_fd:
+ put_unused_fd(fd);
+out_free_ep:
+ ep_free(ep);
return error;
}
@@ -1404,21 +1613,27 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/*
* When we insert an epoll file descriptor, inside another epoll file
* descriptor, there is the change of creating closed loops, which are
- * better be handled here, than in more critical paths.
+ * better be handled here, than in more critical paths. While we are
+ * checking for loops we also determine the list of files reachable
+ * and hang them on the tfile_check_list, so we can check that we
+ * haven't created too many possible wakeup paths.
*
- * We hold epmutex across the loop check and the insert in this case, in
- * order to prevent two separate inserts from racing and each doing the
- * insert "at the same time" such that ep_loop_check passes on both
- * before either one does the insert, thereby creating a cycle.
+ * We need to hold the epmutex across both ep_insert and ep_remove
+ * b/c we want to make sure we are looking at a coherent view of
+ * epoll network.
*/
- if (unlikely(is_file_epoll(tfile) && op == EPOLL_CTL_ADD)) {
+ if (op == EPOLL_CTL_ADD || op == EPOLL_CTL_DEL) {
mutex_lock(&epmutex);
did_lock_epmutex = 1;
- error = -ELOOP;
- if (ep_loop_check(ep, tfile) != 0)
- goto error_tgt_fput;
}
-
+ if (op == EPOLL_CTL_ADD) {
+ if (is_file_epoll(tfile)) {
+ error = -ELOOP;
+ if (ep_loop_check(ep, tfile) != 0)
+ goto error_tgt_fput;
+ } else
+ list_add(&tfile->f_tfile_llink, &tfile_check_list);
+ }
mutex_lock_nested(&ep->mtx, 0);
@@ -1437,6 +1652,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
error = ep_insert(ep, &epds, tfile, fd);
} else
error = -EEXIST;
+ clear_tfile_check_list();
break;
case EPOLL_CTL_DEL:
if (epi)
@@ -1455,7 +1671,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
mutex_unlock(&ep->mtx);
error_tgt_fput:
- if (unlikely(did_lock_epmutex))
+ if (did_lock_epmutex)
mutex_unlock(&epmutex);
fput(tfile);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 3451d23..db9ba1a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1568,7 +1568,13 @@ static int ext3_ordered_writepage(struct page *page,
int err;
J_ASSERT(PageLocked(page));
- WARN_ON_ONCE(IS_RDONLY(inode));
+ /*
+ * We don't want to warn for emergency remount. The condition is
+ * ordered to avoid dereferencing inode->i_sb in non-error case to
+ * avoid slow-downs.
+ */
+ WARN_ON_ONCE(IS_RDONLY(inode) &&
+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
/*
* We give up here if we're reentered, because it might be for a
@@ -1642,7 +1648,13 @@ static int ext3_writeback_writepage(struct page *page,
int err;
J_ASSERT(PageLocked(page));
- WARN_ON_ONCE(IS_RDONLY(inode));
+ /*
+ * We don't want to warn for emergency remount. The condition is
+ * ordered to avoid dereferencing inode->i_sb in non-error case to
+ * avoid slow-downs.
+ */
+ WARN_ON_ONCE(IS_RDONLY(inode) &&
+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
if (ext3_journal_current_handle())
goto out_fail;
@@ -1684,7 +1696,13 @@ static int ext3_journalled_writepage(struct page *page,
int err;
J_ASSERT(PageLocked(page));
- WARN_ON_ONCE(IS_RDONLY(inode));
+ /*
+ * We don't want to warn for emergency remount. The condition is
+ * ordered to avoid dereferencing inode->i_sb in non-error case to
+ * avoid slow-downs.
+ */
+ WARN_ON_ONCE(IS_RDONLY(inode) &&
+ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ERROR_FS));
if (ext3_journal_current_handle())
goto no_write;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 5802fa1..95af6f8 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -261,43 +261,45 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle,
/* super.c */
int ext4_force_commit(struct super_block *sb);
-static inline int ext4_should_journal_data(struct inode *inode)
+/*
+ * Ext4 inode journal modes
+ */
+#define EXT4_INODE_JOURNAL_DATA_MODE 0x01 /* journal data mode */
+#define EXT4_INODE_ORDERED_DATA_MODE 0x02 /* ordered data mode */
+#define EXT4_INODE_WRITEBACK_DATA_MODE 0x04 /* writeback data mode */
+
+static inline int ext4_inode_journal_mode(struct inode *inode)
{
if (EXT4_JOURNAL(inode) == NULL)
- return 0;
- if (!S_ISREG(inode->i_mode))
- return 1;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- return 1;
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return 1;
- return 0;
+ return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
+ /* We do not support data journalling with delayed allocation */
+ if (!S_ISREG(inode->i_mode) ||
+ test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
+ !test_opt(inode->i_sb, DELALLOC))
+ return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+ return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+ return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
+ else
+ BUG();
+}
+
+static inline int ext4_should_journal_data(struct inode *inode)
+{
+ return ext4_inode_journal_mode(inode) & EXT4_INODE_JOURNAL_DATA_MODE;
}
static inline int ext4_should_order_data(struct inode *inode)
{
- if (EXT4_JOURNAL(inode) == NULL)
- return 0;
- if (!S_ISREG(inode->i_mode))
- return 0;
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return 0;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
- return 1;
- return 0;
+ return ext4_inode_journal_mode(inode) & EXT4_INODE_ORDERED_DATA_MODE;
}
static inline int ext4_should_writeback_data(struct inode *inode)
{
- if (EXT4_JOURNAL(inode) == NULL)
- return 1;
- if (!S_ISREG(inode->i_mode))
- return 0;
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA))
- return 0;
- if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
- return 1;
- return 0;
+ return ext4_inode_journal_mode(inode) & EXT4_INODE_WRITEBACK_DATA_MODE;
}
/*
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 14299de..4f31deb 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -341,6 +341,8 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ if (len == 0)
+ return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -2844,7 +2846,7 @@ static int ext4_split_extent_at(handle_t *handle,
if (err)
goto fix_extent_len;
/* update the extent length and mark as initialized */
- ex->ee_len = cpu_to_le32(ee_len);
+ ex->ee_len = cpu_to_le16(ee_len);
ext4_ext_try_to_merge(inode, path, ex);
err = ext4_ext_dirty(handle, inode, path + depth);
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7743b1f..55d3e51 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3216,13 +3216,14 @@ static int ext4_da_write_end(struct file *file,
int write_mode = (int)(unsigned long)fsdata;
if (write_mode == FALL_BACK_TO_NONDELALLOC) {
- if (ext4_should_order_data(inode)) {
+ switch (ext4_inode_journal_mode(inode)) {
+ case EXT4_INODE_ORDERED_DATA_MODE:
return ext4_ordered_write_end(file, mapping, pos,
len, copied, page, fsdata);
- } else if (ext4_should_writeback_data(inode)) {
+ case EXT4_INODE_WRITEBACK_DATA_MODE:
return ext4_writeback_write_end(file, mapping, pos,
len, copied, page, fsdata);
- } else {
+ default:
BUG();
}
}
@@ -3514,12 +3515,17 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
}
retry:
- if (rw == READ && ext4_should_dioread_nolock(inode))
+ if (rw == READ && ext4_should_dioread_nolock(inode)) {
+ if (unlikely(!list_empty(&ei->i_completed_io_list))) {
+ mutex_lock(&inode->i_mutex);
+ ext4_flush_completed_IO(inode);
+ mutex_unlock(&inode->i_mutex);
+ }
ret = __blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
ext4_get_block, NULL, NULL, 0);
- else {
+ } else {
ret = blockdev_direct_IO(rw, iocb, inode,
inode->i_sb->s_bdev, iov,
offset, nr_segs,
@@ -3917,18 +3923,25 @@ static const struct address_space_operations ext4_da_aops = {
void ext4_set_aops(struct inode *inode)
{
- if (ext4_should_order_data(inode) &&
- test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else if (ext4_should_order_data(inode))
- inode->i_mapping->a_ops = &ext4_ordered_aops;
- else if (ext4_should_writeback_data(inode) &&
- test_opt(inode->i_sb, DELALLOC))
- inode->i_mapping->a_ops = &ext4_da_aops;
- else if (ext4_should_writeback_data(inode))
- inode->i_mapping->a_ops = &ext4_writeback_aops;
- else
+ switch (ext4_inode_journal_mode(inode)) {
+ case EXT4_INODE_ORDERED_DATA_MODE:
+ if (test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_da_aops;
+ else
+ inode->i_mapping->a_ops = &ext4_ordered_aops;
+ break;
+ case EXT4_INODE_WRITEBACK_DATA_MODE:
+ if (test_opt(inode->i_sb, DELALLOC))
+ inode->i_mapping->a_ops = &ext4_da_aops;
+ else
+ inode->i_mapping->a_ops = &ext4_writeback_aops;
+ break;
+ case EXT4_INODE_JOURNAL_DATA_MODE:
inode->i_mapping->a_ops = &ext4_journalled_aops;
+ break;
+ default:
+ BUG();
+ }
}
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 7aa77f0..df121b2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1957,17 +1957,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
struct ext4_group_desc *gdp = NULL;
ext4_group_t flex_group_count;
ext4_group_t flex_group;
- int groups_per_flex = 0;
+ unsigned int groups_per_flex = 0;
size_t size;
int i;
sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
- groups_per_flex = 1 << sbi->s_log_groups_per_flex;
-
- if (groups_per_flex < 2) {
+ if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
sbi->s_log_groups_per_flex = 0;
return 1;
}
+ groups_per_flex = 1 << sbi->s_log_groups_per_flex;
/* We allocate both existing and potentially added groups */
flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index b4ba1b3..408073a 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -360,6 +360,10 @@ int hfsplus_rename_cat(u32 cnid,
err = hfs_brec_find(&src_fd);
if (err)
goto out;
+ if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset,
src_fd.entrylength);
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 4df5059..159f5eb 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -146,6 +146,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
filp->f_pos++;
/* fall through */
case 1:
+ if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
+
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
fd.entrylength);
if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) {
@@ -177,6 +182,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir)
err = -EIO;
goto out;
}
+
+ if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) {
+ err = -EIO;
+ goto out;
+ }
+
hfs_bnode_read(fd.bnode, &entry, fd.entryoffset,
fd.entrylength);
type = be16_to_cpu(entry.type);
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 7aafeb8..8b0c875 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -238,17 +238,10 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
loff_t isize;
ssize_t retval = 0;
- mutex_lock(&inode->i_mutex);
-
/* validate length */
if (len == 0)
goto out;
- isize = i_size_read(inode);
- if (!isize)
- goto out;
-
- end_index = (isize - 1) >> huge_page_shift(h);
for (;;) {
struct page *page;
unsigned long nr, ret;
@@ -256,18 +249,21 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
/* nr is the maximum number of bytes to copy from this page */
nr = huge_page_size(h);
+ isize = i_size_read(inode);
+ if (!isize)
+ goto out;
+ end_index = (isize - 1) >> huge_page_shift(h);
if (index >= end_index) {
if (index > end_index)
goto out;
nr = ((isize - 1) & ~huge_page_mask(h)) + 1;
- if (nr <= offset) {
+ if (nr <= offset)
goto out;
- }
}
nr = nr - offset;
/* Find the page */
- page = find_get_page(mapping, index);
+ page = find_lock_page(mapping, index);
if (unlikely(page == NULL)) {
/*
* We have a HOLE, zero out the user-buffer for the
@@ -279,17 +275,18 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
else
ra = 0;
} else {
+ unlock_page(page);
+
/*
* We have the page, copy it to user space buffer.
*/
ra = hugetlbfs_read_actor(page, offset, buf, len, nr);
ret = ra;
+ page_cache_release(page);
}
if (ra < 0) {
if (retval == 0)
retval = ra;
- if (page)
- page_cache_release(page);
goto out;
}
@@ -299,16 +296,12 @@ static ssize_t hugetlbfs_read(struct file *filp, char __user *buf,
index += offset >> huge_page_shift(h);
offset &= ~huge_page_mask(h);
- if (page)
- page_cache_release(page);
-
/* short read or no more work */
if ((ret != nr) || (len == 0))
break;
}
out:
*ppos = ((loff_t)index << huge_page_shift(h)) + offset;
- mutex_unlock(&inode->i_mutex);
return retval;
}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index eef6979..36c2e80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -683,7 +683,7 @@ start_journal_io:
if (commit_transaction->t_need_data_flush &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL);
/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
@@ -819,7 +819,7 @@ wait_for_iobuf:
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
journal->j_flags & JBD2_BARRIER) {
- blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
+ blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL);
}
if (err)
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 0615bfd..8088e00 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1911,6 +1911,8 @@ zap_buffer_unlocked:
clear_buffer_mapped(bh);
clear_buffer_req(bh);
clear_buffer_new(bh);
+ clear_buffer_delay(bh);
+ clear_buffer_unwritten(bh);
bh->b_bdev = NULL;
return may_free;
}
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index f848b52..046bb77 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -241,7 +241,7 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
- if (unlikely(*p > nlm4_failed))
+ if (unlikely(ntohl(*p) > ntohl(nlm4_failed)))
goto out_bad_xdr;
*stat = *p;
return 0;
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 180ac34..36057ce 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -236,7 +236,7 @@ static int decode_nlm_stat(struct xdr_stream *xdr,
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
- if (unlikely(*p > nlm_lck_denied_grace_period))
+ if (unlikely(ntohl(*p) > ntohl(nlm_lck_denied_grace_period)))
goto out_enum;
*stat = *p;
return 0;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index abfff9d..1743064 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -440,7 +440,7 @@ static int param_set_##name(const char *val, struct kernel_param *kp) \
__typeof__(type) num = which_strtol(val, &endp, 0); \
if (endp == val || *endp || num < (min) || num > (max)) \
return -EINVAL; \
- *((int *) kp->arg) = num; \
+ *((type *) kp->arg) = num; \
return 0; \
}
diff --git a/fs/namei.c b/fs/namei.c
index f7593c0..16bda6c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2109,7 +2109,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
/* sayonara */
error = complete_walk(nd);
if (error)
- return ERR_PTR(-ECHILD);
+ return ERR_PTR(error);
error = -ENOTDIR;
if (nd->flags & LOOKUP_DIRECTORY) {
@@ -2208,7 +2208,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
/* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */
error = complete_walk(nd);
if (error)
- goto exit;
+ return ERR_PTR(error);
error = -EISDIR;
if (S_ISDIR(nd->inode->i_mode))
goto exit;
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index aaa09e9..b5c826e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -324,7 +324,7 @@ validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
dprintk("%s enter. slotid %d seqid %d\n",
__func__, args->csa_slotid, args->csa_sequenceid);
- if (args->csa_slotid > NFS41_BC_MAX_CALLBACKS)
+ if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
return htonl(NFS4ERR_BADSLOT);
slot = tbl->slots + args->csa_slotid;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 321a66b..ecabbd8 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -466,6 +466,17 @@ static void nfs_delegation_run_state_manager(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
+void nfs_remove_bad_delegation(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+
+ delegation = nfs_detach_delegation(NFS_I(inode), NFS_SERVER(inode));
+ if (delegation) {
+ nfs_inode_find_state_and_recover(inode, &delegation->stateid);
+ nfs_free_delegation(delegation);
+ }
+}
+
/**
* nfs_expire_all_delegation_types
* @clp: client to process
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index d9322e4..691a796 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -45,6 +45,7 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
void nfs_handle_cb_pathdown(struct nfs_client *clp);
int nfs_client_return_marked_delegations(struct nfs_client *clp);
int nfs_delegations_present(struct nfs_client *clp);
+void nfs_remove_bad_delegation(struct inode *inode);
void nfs_delegation_mark_reclaim(struct nfs_client *clp);
void nfs_delegation_reap_unclaimed(struct nfs_client *clp);
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c4a6983..e1c1365 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -209,6 +209,7 @@ struct nfs4_exception {
long timeout;
int retry;
struct nfs4_state *state;
+ struct inode *inode;
};
struct nfs4_state_recovery_ops {
@@ -344,6 +345,8 @@ extern void nfs4_put_open_state(struct nfs4_state *);
extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
+extern void nfs_inode_find_state_and_recover(struct inode *inode,
+ const nfs4_stateid *stateid);
extern void nfs4_schedule_lease_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a2a7d0a..3d67302 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -254,15 +254,28 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_state *state = exception->state;
+ struct inode *inode = exception->inode;
int ret = errorcode;
exception->retry = 0;
switch(errorcode) {
case 0:
return 0;
+ case -NFS4ERR_OPENMODE:
+ if (nfs_have_delegation(inode, FMODE_READ)) {
+ nfs_inode_return_delegation(inode);
+ exception->retry = 1;
+ return 0;
+ }
+ if (state == NULL)
+ break;
+ nfs4_schedule_stateid_recovery(server, state);
+ goto wait_on_recovery;
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_OPENMODE:
+ if (state != NULL)
+ nfs_remove_bad_delegation(state->inode);
if (state == NULL)
break;
nfs4_schedule_stateid_recovery(server, state);
@@ -1305,8 +1318,11 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
* The show must go on: exit, but mark the
* stateid as needing recovery.
*/
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
+ nfs_inode_find_state_and_recover(state->inode,
+ stateid);
nfs4_schedule_stateid_recovery(server, state);
case -EKEYEXPIRED:
/*
@@ -1862,7 +1878,10 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
struct nfs4_state *state)
{
struct nfs_server *server = NFS_SERVER(inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .state = state,
+ .inode = inode,
+ };
int err;
do {
err = nfs4_handle_exception(server,
@@ -3678,8 +3697,11 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
if (task->tk_status >= 0)
return 0;
switch(task->tk_status) {
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
+ if (state != NULL)
+ nfs_remove_bad_delegation(state->inode);
case -NFS4ERR_OPENMODE:
if (state == NULL)
break;
@@ -4402,7 +4424,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .inode = state->inode,
+ };
int err;
do {
@@ -4420,7 +4444,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request)
{
struct nfs_server *server = NFS_SERVER(state->inode);
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .inode = state->inode,
+ };
int err;
err = nfs4_set_lock_state(state, request);
@@ -4484,7 +4510,10 @@ out:
static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- struct nfs4_exception exception = { };
+ struct nfs4_exception exception = {
+ .state = state,
+ .inode = state->inode,
+ };
int err;
do {
@@ -4529,6 +4558,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
if (state == NULL)
return -ENOLCK;
+ /*
+ * Don't rely on the VFS having checked the file open mode,
+ * since it won't do this for flock() locks.
+ */
+ switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) {
+ case F_RDLCK:
+ if (!(filp->f_mode & FMODE_READ))
+ return -EBADF;
+ break;
+ case F_WRLCK:
+ if (!(filp->f_mode & FMODE_WRITE))
+ return -EBADF;
+ }
+
do {
status = nfs4_proc_setlk(state, cmd, request);
if ((status != -EAGAIN) || IS_SETLK(cmd))
@@ -4577,6 +4620,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
* The show must go on: exit, but mark the
* stateid as needing recovery.
*/
+ case -NFS4ERR_DELEG_REVOKED:
case -NFS4ERR_ADMIN_REVOKED:
case -NFS4ERR_BAD_STATEID:
case -NFS4ERR_OPENMODE:
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e97dd21..c6e2769 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1069,6 +1069,33 @@ void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4
nfs4_schedule_state_manager(clp);
}
+void nfs_inode_find_state_and_recover(struct inode *inode,
+ const nfs4_stateid *stateid)
+{
+ struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_open_context *ctx;
+ struct nfs4_state *state;
+ bool found = false;
+
+ spin_lock(&inode->i_lock);
+ list_for_each_entry(ctx, &nfsi->open_files, list) {
+ state = ctx->state;
+ if (state == NULL)
+ continue;
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
+ continue;
+ if (memcmp(state->stateid.data, stateid->data, sizeof(state->stateid.data)) != 0)
+ continue;
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ found = true;
+ }
+ spin_unlock(&inode->i_lock);
+ if (found)
+ nfs4_schedule_state_manager(clp);
+}
+
+
static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
{
struct inode *inode = state->inode;
@@ -1519,16 +1546,16 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
{
if (!flags)
return;
- else if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
+ if (flags & SEQ4_STATUS_RESTART_RECLAIM_NEEDED)
nfs41_handle_server_reboot(clp);
- else if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
+ if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED |
SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED |
SEQ4_STATUS_ADMIN_STATE_REVOKED |
SEQ4_STATUS_LEASE_MOVED))
nfs41_handle_state_revoked(clp);
- else if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
+ if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED)
nfs41_handle_recallable_state_revoked(clp);
- else if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
+ if (flags & (SEQ4_STATUS_CB_PATH_DOWN |
SEQ4_STATUS_BACKCHANNEL_FAULT |
SEQ4_STATUS_CB_PATH_DOWN_SESSION))
nfs41_handle_cb_path_down(clp);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 1d1dc1e..75fe694 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1006,7 +1006,8 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
static struct pnfs_layoutdriver_type objlayout_type = {
.id = LAYOUT_OSD2_OBJECTS,
.name = "LAYOUT_OSD2_OBJECTS",
- .flags = PNFS_LAYOUTRET_ON_SETATTR,
+ .flags = PNFS_LAYOUTRET_ON_SETATTR |
+ PNFS_LAYOUTRET_ON_ERROR,
.alloc_layout_hdr = objlayout_alloc_layout_hdr,
.free_layout_hdr = objlayout_free_layout_hdr,
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 1d06f8e..fefa122 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -294,9 +294,11 @@ objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync)
dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
rdata = state->rpcdata;
rdata->task.tk_status = status;
- if (status >= 0) {
+ if (likely(status >= 0)) {
rdata->res.count = status;
rdata->res.eof = eof;
+ } else {
+ rdata->pnfs_error = status;
}
objlayout_iodone(state);
/* must not use state after this point */
@@ -380,15 +382,17 @@ objlayout_write_done(struct objlayout_io_state *state, ssize_t status,
wdata = state->rpcdata;
state->status = status;
wdata->task.tk_status = status;
- if (status >= 0) {
+ if (likely(status >= 0)) {
wdata->res.count = status;
wdata->verf.committed = state->committed;
dprintk("%s: Return status %d committed %d\n",
__func__, wdata->task.tk_status,
wdata->verf.committed);
- } else
+ } else {
+ wdata->pnfs_error = status;
dprintk("%s: Return status %d\n",
__func__, wdata->task.tk_status);
+ }
objlayout_iodone(state);
/* must not use state after this point */
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 36d2a29..9951887 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1119,6 +1119,14 @@ pnfs_ld_write_done(struct nfs_write_data *data)
data->mds_ops->rpc_release(data);
return 0;
}
+ if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR) {
+ /* Don't lo_commit on error, Server will needs to
+ * preform a file recovery.
+ */
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(data->inode)->flags);
+ pnfs_return_layout(data->inode);
+ }
dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
data->pnfs_error);
@@ -1167,6 +1175,10 @@ pnfs_ld_read_done(struct nfs_read_data *data)
return 0;
}
+ if (NFS_SERVER(data->inode)->pnfs_curr_ld->flags &
+ PNFS_LAYOUTRET_ON_ERROR)
+ pnfs_return_layout(data->inode);
+
dprintk("%s: pnfs_error=%d, retry via MDS\n", __func__,
data->pnfs_error);
status = nfs_initiate_read(data, NFS_CLIENT(data->inode),
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 9d147d9..bb8b324 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -68,6 +68,7 @@ enum {
enum layoutdriver_policy_flags {
/* Should the pNFS client commit and return the layout upon a setattr */
PNFS_LAYOUTRET_ON_SETATTR = 1 << 0,
+ PNFS_LAYOUTRET_ON_ERROR = 1 << 1,
};
struct nfs4_deviceid_node;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 858d31b..8e7b61d 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -904,10 +904,24 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(unsigned int ve
data->auth_flavor_len = 1;
data->version = version;
data->minorversion = 0;
+ security_init_mnt_opts(&data->lsm_opts);
}
return data;
}
+static void nfs_free_parsed_mount_data(struct nfs_parsed_mount_data *data)
+{
+ if (data) {
+ kfree(data->client_address);
+ kfree(data->mount_server.hostname);
+ kfree(data->nfs_server.export_path);
+ kfree(data->nfs_server.hostname);
+ kfree(data->fscache_uniq);
+ security_free_mnt_opts(&data->lsm_opts);
+ kfree(data);
+ }
+}
+
/*
* Sanity-check a server address provided by the mount command.
*
@@ -2218,9 +2232,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
mntfh = nfs_alloc_fhandle();
if (data == NULL || mntfh == NULL)
- goto out_free_fh;
-
- security_init_mnt_opts(&data->lsm_opts);
+ goto out;
/* Validate the mount data */
error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
@@ -2232,8 +2244,6 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
#ifdef CONFIG_NFS_V4
if (data->version == 4) {
mntroot = nfs4_try_mount(flags, dev_name, data);
- kfree(data->client_address);
- kfree(data->nfs_server.export_path);
goto out;
}
#endif /* CONFIG_NFS_V4 */
@@ -2284,13 +2294,8 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
s->s_flags |= MS_ACTIVE;
out:
- kfree(data->nfs_server.hostname);
- kfree(data->mount_server.hostname);
- kfree(data->fscache_uniq);
- security_free_mnt_opts(&data->lsm_opts);
-out_free_fh:
+ nfs_free_parsed_mount_data(data);
nfs_free_fhandle(mntfh);
- kfree(data);
return mntroot;
out_err_nosb:
@@ -2613,9 +2618,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
mntfh = nfs_alloc_fhandle();
if (data == NULL || mntfh == NULL)
- goto out_free_fh;
-
- security_init_mnt_opts(&data->lsm_opts);
+ goto out;
/* Get a volume representation */
server = nfs4_create_server(data, mntfh);
@@ -2663,13 +2666,10 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
s->s_flags |= MS_ACTIVE;
- security_free_mnt_opts(&data->lsm_opts);
nfs_free_fhandle(mntfh);
return mntroot;
out:
- security_free_mnt_opts(&data->lsm_opts);
-out_free_fh:
nfs_free_fhandle(mntfh);
return ERR_PTR(error);
@@ -2694,11 +2694,15 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
char *root_devname;
size_t len;
- len = strlen(hostname) + 3;
+ len = strlen(hostname) + 5;
root_devname = kmalloc(len, GFP_KERNEL);
if (root_devname == NULL)
return ERR_PTR(-ENOMEM);
- snprintf(root_devname, len, "%s:/", hostname);
+ /* Does hostname needs to be enclosed in brackets? */
+ if (strchr(hostname, ':'))
+ snprintf(root_devname, len, "[%s]:/", hostname);
+ else
+ snprintf(root_devname, len, "%s:/", hostname);
root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
kfree(root_devname);
return root_mnt;
@@ -2855,7 +2859,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type,
data = nfs_alloc_parsed_mount_data(4);
if (data == NULL)
- goto out_free_data;
+ goto out;
/* Validate the mount data */
error = nfs4_validate_mount_data(raw_data, data, dev_name);
@@ -2869,12 +2873,7 @@ static struct dentry *nfs4_mount(struct file_system_type *fs_type,
error = PTR_ERR(res);
out:
- kfree(data->client_address);
- kfree(data->nfs_server.export_path);
- kfree(data->nfs_server.hostname);
- kfree(data->fscache_uniq);
-out_free_data:
- kfree(data);
+ nfs_free_parsed_mount_data(data);
dprintk("<-- nfs4_mount() = %d%s\n", error,
error != 0 ? " [error]" : "");
return res;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index b9566e4..4b470f6 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -88,7 +88,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
struct svc_expkey key;
struct svc_expkey *ek = NULL;
- if (mesg[mlen-1] != '\n')
+ if (mlen < 1 || mesg[mlen-1] != '\n')
return -EINVAL;
mesg[mlen-1] = 0;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 08c6e36..43f46cd 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -803,13 +803,13 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
return p;
}
-static int
+static __be32
compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
const char *name, int namlen)
{
struct svc_export *exp;
struct dentry *dparent, *dchild;
- int rv = 0;
+ __be32 rv = nfserr_noent;
dparent = cd->fh.fh_dentry;
exp = cd->fh.fh_export;
@@ -817,26 +817,20 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp,
if (isdotent(name, namlen)) {
if (namlen == 2) {
dchild = dget_parent(dparent);
- if (dchild == dparent) {
- /* filesystem root - cannot return filehandle for ".." */
- dput(dchild);
- return -ENOENT;
- }
+ /* filesystem root - cannot return filehandle for ".." */
+ if (dchild == dparent)
+ goto out;
} else
dchild = dget(dparent);
} else
dchild = lookup_one_len(name, dparent, namlen);
if (IS_ERR(dchild))
- return -ENOENT;
- rv = -ENOENT;
+ return rv;
if (d_mountpoint(dchild))
goto out;
- rv = fh_compose(fhp, exp, dchild, &cd->fh);
- if (rv)
- goto out;
if (!dchild->d_inode)
goto out;
- rv = 0;
+ rv = fh_compose(fhp, exp, dchild, &cd->fh);
out:
dput(dchild);
return rv;
@@ -845,7 +839,7 @@ out:
static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen)
{
struct svc_fh fh;
- int err;
+ __be32 err;
fh_init(&fh, NFS3_FHSIZE);
err = compose_entry_fh(cd, &fh, name, namlen);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 0b8830c..d06a02c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -812,6 +812,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setattr *setattr)
{
__be32 status = nfs_ok;
+ int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
nfs4_lock_state();
@@ -823,9 +824,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
}
}
- status = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
- if (status)
- return status;
+ err = mnt_want_write(cstate->current_fh.fh_export->ex_path.mnt);
+ if (err)
+ return nfserrno(err);
status = nfs_ok;
status = check_attr_support(rqstp, cstate, setattr->sa_bmval,
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ecd8152..92f7eb7 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3956,16 +3956,14 @@ out:
* vfs_test_lock. (Arguably perhaps test_lock should be done with an
* inode operation.)
*/
-static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
+static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
struct file *file;
- int err;
-
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
- if (err)
- return err;
- err = vfs_test_lock(file, lock);
- nfsd_close(file);
+ __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ if (!err) {
+ err = nfserrno(vfs_test_lock(file, lock));
+ nfsd_close(file);
+ }
return err;
}
@@ -3978,7 +3976,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct inode *inode;
struct file_lock file_lock;
- int error;
__be32 status;
if (locks_in_grace())
@@ -4030,12 +4027,10 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_transform_lock_offset(&file_lock);
- status = nfs_ok;
- error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
- if (error) {
- status = nfserrno(error);
+ status = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock);
+ if (status)
goto out;
- }
+
if (file_lock.fl_type != F_UNLCK) {
status = nfserr_denied;
nfs4_set_lock_denied(&file_lock, &lockt->lt_denied);
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 41d6743..3e65427 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -842,6 +842,19 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
case FS_IOC32_GETVERSION:
cmd = FS_IOC_GETVERSION;
break;
+ case NILFS_IOCTL_CHANGE_CPMODE:
+ case NILFS_IOCTL_DELETE_CHECKPOINT:
+ case NILFS_IOCTL_GET_CPINFO:
+ case NILFS_IOCTL_GET_CPSTAT:
+ case NILFS_IOCTL_GET_SUINFO:
+ case NILFS_IOCTL_GET_SUSTAT:
+ case NILFS_IOCTL_GET_VINFO:
+ case NILFS_IOCTL_GET_BDESCS:
+ case NILFS_IOCTL_CLEAN_SEGMENTS:
+ case NILFS_IOCTL_SYNC:
+ case NILFS_IOCTL_RESIZE:
+ case NILFS_IOCTL_SET_ALLOC_RANGE:
+ break;
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index d327140..35a8970 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -515,6 +515,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs,
brelse(sbh[1]);
sbh[1] = NULL;
sbp[1] = NULL;
+ valid[1] = 0;
swp = 0;
}
if (!valid[swp]) {
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 252ab1f..42ed195 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -135,9 +135,6 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
- /* 1 from caller and 1 for being on i_list/g_list */
- BUG_ON(atomic_read(&mark->refcnt) < 2);
-
spin_lock(&group->mark_lock);
if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
@@ -182,6 +179,11 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark)
iput(inode);
/*
+ * We don't necessarily have a ref on mark from caller so the above iput
+ * may have already destroyed it. Don't touch from now on.
+ */
+
+ /*
* it's possible that this group tried to destroy itself, but this
* this mark was simultaneously being freed by inode. If that's the
* case, we finish freeing the group here.
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ed553c6..76c8165 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1134,7 +1134,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
}
el = path_leaf_el(path);
- rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
+ rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1];
ocfs2_adjust_rightmost_records(handle, et, path, rec);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index ebfd382..15d29cc 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -1036,14 +1036,14 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
tmp_el = left_path->p_node[subtree_root].el;
blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
- for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
+ for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) {
if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
*cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
break;
}
}
- BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
+ BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec));
out:
ocfs2_free_path(left_path);
@@ -1468,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
trace_ocfs2_divide_leaf_refcount_block(
(unsigned long long)ref_leaf_bh->b_blocknr,
- le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
+ le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used));
/*
* XXX: Improvement later.
@@ -2411,7 +2411,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
rb = (struct ocfs2_refcount_block *)
prev_bh->b_data;
- if (le64_to_cpu(rb->rf_records.rl_used) +
+ if (le16_to_cpu(rb->rf_records.rl_used) +
recs_add >
le16_to_cpu(rb->rf_records.rl_count))
ref_blocks++;
@@ -2476,7 +2476,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
if (prev_bh) {
rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
- if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
+ if (le16_to_cpu(rb->rf_records.rl_used) + recs_add >
le16_to_cpu(rb->rf_records.rl_count))
ref_blocks++;
@@ -3629,7 +3629,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
* one will split a refcount rec, so totally we need
* clusters * 2 new refcount rec.
*/
- if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
+ if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
le16_to_cpu(rb->rf_records.rl_count))
ref_blocks++;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index ba5d97e..f169da4 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -600,7 +600,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle,
ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
cluster_ac->ac_bh,
le64_to_cpu(rec->e_blkno),
- le32_to_cpu(rec->e_leaf_clusters));
+ le16_to_cpu(rec->e_leaf_clusters));
if (ret)
mlog_errno(ret);
/* Try all the clusters to free */
@@ -1628,7 +1628,7 @@ static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
{
unsigned int bpc = le16_to_cpu(cl->cl_bpc);
unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
- unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
+ unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc;
if (res->sr_bit_offset < bitoff)
return 0;
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index a0a041d..811960a 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -550,17 +550,11 @@ static bool disk_unlock_native_capacity(struct gendisk *disk)
}
}
-int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
{
- struct parsed_partitions *state = NULL;
struct disk_part_iter piter;
struct hd_struct *part;
- int p, highest, res;
-rescan:
- if (state && !IS_ERR(state)) {
- kfree(state);
- state = NULL;
- }
+ int res;
if (bdev->bd_part_count)
return -EBUSY;
@@ -573,6 +567,24 @@ rescan:
delete_partition(disk, part->partno);
disk_part_iter_exit(&piter);
+ return 0;
+}
+
+int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+ struct parsed_partitions *state = NULL;
+ struct hd_struct *part;
+ int p, highest, res;
+rescan:
+ if (state && !IS_ERR(state)) {
+ kfree(state);
+ state = NULL;
+ }
+
+ res = drop_partitions(disk, bdev);
+ if (res)
+ return res;
+
if (disk->fops->revalidate_disk)
disk->fops->revalidate_disk(disk);
check_disk_size_change(disk, bdev);
@@ -676,6 +688,26 @@ rescan:
return 0;
}
+int invalidate_partitions(struct gendisk *disk, struct block_device *bdev)
+{
+ int res;
+
+ if (!bdev->bd_invalidated)
+ return 0;
+
+ res = drop_partitions(disk, bdev);
+ if (res)
+ return res;
+
+ set_capacity(disk, 0);
+ check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
+ /* tell userspace that the media / partition table may have changed */
+ kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE);
+
+ return 0;
+}
+
unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
diff --git a/fs/pipe.c b/fs/pipe.c
index da42f7d..0499a96 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -345,6 +345,16 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static const struct pipe_buf_operations packet_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = anon_pipe_buf_release,
+ .steal = generic_pipe_buf_steal,
+ .get = generic_pipe_buf_get,
+};
+
static ssize_t
pipe_read(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t pos)
@@ -406,6 +416,13 @@ redo:
ret += chars;
buf->offset += chars;
buf->len -= chars;
+
+ /* Was it a packet buffer? Clean up and exit */
+ if (buf->flags & PIPE_BUF_FLAG_PACKET) {
+ total_len = chars;
+ buf->len = 0;
+ }
+
if (!buf->len) {
buf->ops = NULL;
ops->release(pipe, buf);
@@ -458,6 +475,11 @@ redo:
return ret;
}
+static inline int is_packetized(struct file *file)
+{
+ return (file->f_flags & O_DIRECT) != 0;
+}
+
static ssize_t
pipe_write(struct kiocb *iocb, const struct iovec *_iov,
unsigned long nr_segs, loff_t ppos)
@@ -592,6 +614,11 @@ redo2:
buf->ops = &anon_pipe_buf_ops;
buf->offset = 0;
buf->len = chars;
+ buf->flags = 0;
+ if (is_packetized(filp)) {
+ buf->ops = &packet_pipe_buf_ops;
+ buf->flags = PIPE_BUF_FLAG_PACKET;
+ }
pipe->nrbufs = ++bufs;
pipe->tmp_page = NULL;
@@ -1012,7 +1039,7 @@ struct file *create_write_pipe(int flags)
goto err_dentry;
f->f_mapping = inode->i_mapping;
- f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
+ f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
f->f_version = 0;
return f;
@@ -1056,7 +1083,7 @@ int do_pipe_flags(int *fd, int flags)
int error;
int fdw, fdr;
- if (flags & ~(O_CLOEXEC | O_NONBLOCK))
+ if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
return -EINVAL;
fw = create_write_pipe(flags);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index ca54a1b..cd2df5a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -211,8 +211,7 @@ static struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
mm = get_task_mm(task);
if (mm && mm != current->mm &&
- !ptrace_may_access(task, mode) &&
- !capable(CAP_SYS_RESOURCE)) {
+ !ptrace_may_access(task, mode)) {
mmput(mm);
mm = ERR_PTR(-EACCES);
}
@@ -782,6 +781,13 @@ static int mem_open(struct inode* inode, struct file* file)
if (IS_ERR(mm))
return PTR_ERR(mm);
+ if (mm) {
+ /* ensure this mm_struct can't be freed */
+ atomic_inc(&mm->mm_count);
+ /* but do not pin its memory */
+ mmput(mm);
+ }
+
/* OK to pass negative loff_t, we can catch out-of-range */
file->f_mode |= FMODE_UNSIGNED_OFFSET;
file->private_data = mm;
@@ -789,57 +795,13 @@ static int mem_open(struct inode* inode, struct file* file)
return 0;
}
-static ssize_t mem_read(struct file * file, char __user * buf,
- size_t count, loff_t *ppos)
+static ssize_t mem_rw(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos, int write)
{
- int ret;
- char *page;
- unsigned long src = *ppos;
struct mm_struct *mm = file->private_data;
-
- if (!mm)
- return 0;
-
- page = (char *)__get_free_page(GFP_TEMPORARY);
- if (!page)
- return -ENOMEM;
-
- ret = 0;
-
- while (count > 0) {
- int this_len, retval;
-
- this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
- retval = access_remote_vm(mm, src, page, this_len, 0);
- if (!retval) {
- if (!ret)
- ret = -EIO;
- break;
- }
-
- if (copy_to_user(buf, page, retval)) {
- ret = -EFAULT;
- break;
- }
-
- ret += retval;
- src += retval;
- buf += retval;
- count -= retval;
- }
- *ppos = src;
-
- free_page((unsigned long) page);
- return ret;
-}
-
-static ssize_t mem_write(struct file * file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- int copied;
+ unsigned long addr = *ppos;
+ ssize_t copied;
char *page;
- unsigned long dst = *ppos;
- struct mm_struct *mm = file->private_data;
if (!mm)
return 0;
@@ -849,31 +811,54 @@ static ssize_t mem_write(struct file * file, const char __user *buf,
return -ENOMEM;
copied = 0;
+ if (!atomic_inc_not_zero(&mm->mm_users))
+ goto free;
+
while (count > 0) {
- int this_len, retval;
+ int this_len = min_t(int, count, PAGE_SIZE);
- this_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
- if (copy_from_user(page, buf, this_len)) {
+ if (write && copy_from_user(page, buf, this_len)) {
copied = -EFAULT;
break;
}
- retval = access_remote_vm(mm, dst, page, this_len, 1);
- if (!retval) {
+
+ this_len = access_remote_vm(mm, addr, page, this_len, write);
+ if (!this_len) {
if (!copied)
copied = -EIO;
break;
}
- copied += retval;
- buf += retval;
- dst += retval;
- count -= retval;
+
+ if (!write && copy_to_user(buf, page, this_len)) {
+ copied = -EFAULT;
+ break;
+ }
+
+ buf += this_len;
+ addr += this_len;
+ copied += this_len;
+ count -= this_len;
}
- *ppos = dst;
+ *ppos = addr;
+ mmput(mm);
+free:
free_page((unsigned long) page);
return copied;
}
+static ssize_t mem_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return mem_rw(file, buf, count, ppos, 0);
+}
+
+static ssize_t mem_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ return mem_rw(file, (char __user*)buf, count, ppos, 1);
+}
+
loff_t mem_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
@@ -893,8 +878,9 @@ loff_t mem_lseek(struct file *file, loff_t offset, int orig)
static int mem_release(struct inode *inode, struct file *file)
{
struct mm_struct *mm = file->private_data;
+ if (mm)
+ mmdrop(mm);
- mmput(mm);
return 0;
}
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index be177f7..d6c078e 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -54,7 +54,7 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
ei->ns_ops = ns_ops;
ei->ns = ns;
- dentry->d_op = &pid_dentry_operations;
+ d_set_d_op(dentry, &pid_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, NULL))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index c7d4ee6..55a1f49 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -407,6 +407,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
} else {
spin_unlock(&walk->mm->page_table_lock);
}
+
+ if (pmd_trans_unstable(pmd))
+ return 0;
/*
* The mmap_sem held all the way back in m_start() is what
* keeps khugepaged out of here and from collapsing things
@@ -505,6 +508,8 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
struct page *page;
split_huge_page_pmd(walk->mm, pmd);
+ if (pmd_trans_unstable(pmd))
+ return 0;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -516,6 +521,9 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
if (!page)
continue;
+ if (PageReserved(page))
+ continue;
+
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
ClearPageReferenced(page);
@@ -665,6 +673,8 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
int err = 0;
split_huge_page_pmd(walk->mm, pmd);
+ if (pmd_trans_unstable(pmd))
+ return 0;
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
@@ -956,6 +966,8 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
spin_unlock(&walk->mm->page_table_lock);
}
+ if (pmd_trans_unstable(pmd))
+ return 0;
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
do {
struct page *page = can_gather_numa_stats(*pte, md->vma, addr);
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 766b1d4..29166ec 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -11,15 +11,20 @@ static int uptime_proc_show(struct seq_file *m, void *v)
{
struct timespec uptime;
struct timespec idle;
+ cputime64_t idletime;
+ u64 nsec;
+ u32 rem;
int i;
- cputime_t idletime = cputime_zero;
+ idletime = 0;
for_each_possible_cpu(i)
idletime = cputime64_add(idletime, kstat_cpu(i).cpustat.idle);
do_posix_clock_monotonic_gettime(&uptime);
monotonic_to_bootbased(&uptime);
- cputime_to_timespec(idletime, &idle);
+ nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
+ idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
+ idle.tv_nsec = rem;
seq_printf(m, "%lu.%02lu %lu.%02lu\n",
(unsigned long) uptime.tv_sec,
(uptime.tv_nsec / (NSEC_PER_SEC / 100)),
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index aa91089..f19dfbf 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -453,16 +453,20 @@ int remove_save_link(struct inode *inode, int truncate)
static void reiserfs_kill_sb(struct super_block *s)
{
if (REISERFS_SB(s)) {
- if (REISERFS_SB(s)->xattr_root) {
- d_invalidate(REISERFS_SB(s)->xattr_root);
- dput(REISERFS_SB(s)->xattr_root);
- REISERFS_SB(s)->xattr_root = NULL;
- }
- if (REISERFS_SB(s)->priv_root) {
- d_invalidate(REISERFS_SB(s)->priv_root);
- dput(REISERFS_SB(s)->priv_root);
- REISERFS_SB(s)->priv_root = NULL;
- }
+ /*
+ * Force any pending inode evictions to occur now. Any
+ * inodes to be removed that have extended attributes
+ * associated with them need to clean them up before
+ * we can release the extended attribute root dentries.
+ * shrink_dcache_for_umount will BUG if we don't release
+ * those before it's called so ->put_super is too late.
+ */
+ shrink_dcache_sb(s);
+
+ dput(REISERFS_SB(s)->xattr_root);
+ REISERFS_SB(s)->xattr_root = NULL;
+ dput(REISERFS_SB(s)->priv_root);
+ REISERFS_SB(s)->priv_root = NULL;
}
kill_block_super(s);
@@ -1164,7 +1168,8 @@ static void handle_quota_files(struct super_block *s, char **qf_names,
kfree(REISERFS_SB(s)->s_qf_names[i]);
REISERFS_SB(s)->s_qf_names[i] = qf_names[i];
}
- REISERFS_SB(s)->s_jquota_fmt = *qfmt;
+ if (*qfmt)
+ REISERFS_SB(s)->s_jquota_fmt = *qfmt;
}
#endif
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 492465b..7ae2a57 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -30,6 +30,21 @@
#include <linux/signalfd.h>
#include <linux/syscalls.h>
+void signalfd_cleanup(struct sighand_struct *sighand)
+{
+ wait_queue_head_t *wqh = &sighand->signalfd_wqh;
+ /*
+ * The lockless check can race with remove_wait_queue() in progress,
+ * but in this case its caller should run under rcu_read_lock() and
+ * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return.
+ */
+ if (likely(!waitqueue_active(wqh)))
+ return;
+
+ /* wait_queue_t->func(POLLFREE) should do remove_wait_queue() */
+ wake_up_poll(wqh, POLLHUP | POLLFREE);
+}
+
struct signalfd_ctx {
sigset_t sigmask;
};
diff --git a/fs/splice.c b/fs/splice.c
index aa866d3..9d89008 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -31,6 +31,7 @@
#include <linux/uio.h>
#include <linux/security.h>
#include <linux/gfp.h>
+#include <linux/socket.h>
/*
* Attempt to steal a page from a pipe buffer. This should perhaps go into
@@ -691,7 +692,9 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,
if (!likely(file->f_op && file->f_op->sendpage))
return -EINVAL;
- more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len;
+ more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
+ if (sd->len < sd->total_len)
+ more |= MSG_SENDPAGE_NOTLAST;
return file->f_op->sendpage(file, buf->page, buf->offset,
sd->len, &pos, more);
}
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 0a12eb8..a494413 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -136,12 +136,13 @@ static int sysfs_sd_setsecdata(struct sysfs_dirent *sd, void **secdata, u32 *sec
void *old_secdata;
size_t old_secdata_len;
- iattrs = sd->s_iattr;
- if (!iattrs)
- iattrs = sysfs_init_inode_attrs(sd);
- if (!iattrs)
- return -ENOMEM;
+ if (!sd->s_iattr) {
+ sd->s_iattr = sysfs_init_inode_attrs(sd);
+ if (!sd->s_iattr)
+ return -ENOMEM;
+ }
+ iattrs = sd->s_iattr;
old_secdata = iattrs->ia_secdata;
old_secdata_len = iattrs->ia_secdata_len;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index a811ac4..fd75b63 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -121,20 +121,21 @@ const char *dbg_key_str1(const struct ubifs_info *c,
const union ubifs_key *key);
/*
- * DBGKEY macros require @dbg_lock to be held, which it is in the dbg message
- * macros.
+ * TODO: these macros are now broken because there is no locking around them
+ * and we use a global buffer for the key string. This means that in case of
+ * concurrent execution we will end up with incorrect and messy key strings.
*/
#define DBGKEY(key) dbg_key_str0(c, (key))
#define DBGKEY1(key) dbg_key_str1(c, (key))
-#define ubifs_dbg_msg(type, fmt, ...) do { \
- spin_lock(&dbg_lock); \
- pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
- spin_unlock(&dbg_lock); \
-} while (0)
+#define ubifs_dbg_msg(type, fmt, ...) \
+ pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__)
/* Just a debugging messages not related to any specific UBIFS subsystem */
-#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
+#define dbg_msg(fmt, ...) \
+ printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
+ __func__, ##__VA_ARGS__)
+
/* General messages */
#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
/* Additional journal messages */
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2a346bb..3438b00 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -125,7 +125,6 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
err = udf_expand_file_adinicb(inode);
if (err) {
udf_debug("udf_expand_adinicb: err=%d\n", err);
- up_write(&iinfo->i_data_sem);
return err;
}
} else {
@@ -133,9 +132,10 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
iinfo->i_lenAlloc = pos + count;
else
iinfo->i_lenAlloc = inode->i_size;
+ up_write(&iinfo->i_data_sem);
}
- }
- up_write(&iinfo->i_data_sem);
+ } else
+ up_write(&iinfo->i_data_sem);
retval = generic_file_aio_write(iocb, iov, nr_segs, ppos);
if (retval > 0)
@@ -201,12 +201,10 @@ out:
static int udf_release_file(struct inode *inode, struct file *filp)
{
if (filp->f_mode & FMODE_WRITE) {
- mutex_lock(&inode->i_mutex);
down_write(&UDF_I(inode)->i_data_sem);
udf_discard_prealloc(inode);
udf_truncate_tail_extent(inode);
up_write(&UDF_I(inode)->i_data_sem);
- mutex_unlock(&inode->i_mutex);
}
return 0;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 1d1358e..262050f 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -145,6 +145,12 @@ const struct address_space_operations udf_aops = {
.bmap = udf_bmap,
};
+/*
+ * Expand file stored in ICB to a normal one-block-file
+ *
+ * This function requires i_data_sem for writing and releases it.
+ * This function requires i_mutex held
+ */
int udf_expand_file_adinicb(struct inode *inode)
{
struct page *page;
@@ -163,9 +169,15 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ up_write(&iinfo->i_data_sem);
mark_inode_dirty(inode);
return 0;
}
+ /*
+ * Release i_data_sem so that we can lock a page - page lock ranks
+ * above i_data_sem. i_mutex still protects us against file changes.
+ */
+ up_write(&iinfo->i_data_sem);
page = find_or_create_page(inode->i_mapping, 0, GFP_NOFS);
if (!page)
@@ -181,6 +193,7 @@ int udf_expand_file_adinicb(struct inode *inode)
SetPageUptodate(page);
kunmap(page);
}
+ down_write(&iinfo->i_data_sem);
memset(iinfo->i_ext.i_data + iinfo->i_lenEAttr, 0x00,
iinfo->i_lenAlloc);
iinfo->i_lenAlloc = 0;
@@ -190,17 +203,20 @@ int udf_expand_file_adinicb(struct inode *inode)
iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG;
/* from now on we have normal address_space methods */
inode->i_data.a_ops = &udf_aops;
+ up_write(&iinfo->i_data_sem);
err = inode->i_data.a_ops->writepage(page, &udf_wbc);
if (err) {
/* Restore everything back so that we don't lose data... */
lock_page(page);
kaddr = kmap(page);
+ down_write(&iinfo->i_data_sem);
memcpy(iinfo->i_ext.i_data + iinfo->i_lenEAttr, kaddr,
inode->i_size);
kunmap(page);
unlock_page(page);
iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB;
inode->i_data.a_ops = &udf_adinicb_aops;
+ up_write(&iinfo->i_data_sem);
}
page_cache_release(page);
mark_inode_dirty(inode);
@@ -1105,10 +1121,9 @@ int udf_setsize(struct inode *inode, loff_t newsize)
if (bsize <
(udf_file_entry_alloc_offset(inode) + newsize)) {
err = udf_expand_file_adinicb(inode);
- if (err) {
- up_write(&iinfo->i_data_sem);
+ if (err)
return err;
- }
+ down_write(&iinfo->i_data_sem);
} else
iinfo->i_lenAlloc = newsize;
}
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 7b27b06..7f0e18a 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1830,6 +1830,12 @@ static void udf_close_lvid(struct super_block *sb)
le16_to_cpu(lvid->descTag.descCRCLength)));
lvid->descTag.tagChecksum = udf_tag_checksum(&lvid->descTag);
+ /*
+ * We set buffer uptodate unconditionally here to avoid spurious
+ * warnings from mark_buffer_dirty() when previous EIO has marked
+ * the buffer as !uptodate
+ */
+ set_buffer_uptodate(bh);
mark_buffer_dirty(bh);
sbi->s_lvid_dirty = 0;
mutex_unlock(&sbi->s_alloc_mutex);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 4b9fb91..f86e034 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -39,7 +39,7 @@ xfs_acl_from_disk(struct xfs_acl *aclp)
struct posix_acl_entry *acl_e;
struct posix_acl *acl;
struct xfs_acl_entry *ace;
- int count, i;
+ unsigned int count, i;
count = be32_to_cpu(aclp->acl_cnt);
if (count > XFS_ACL_MAX_ENTRIES)
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
index 244e797..572494f 100644
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ b/fs/xfs/linux-2.6/xfs_discard.c
@@ -68,7 +68,7 @@ xfs_trim_extents(
* Look up the longest btree in the AGF and start with it.
*/
error = xfs_alloc_lookup_le(cur, 0,
- XFS_BUF_TO_AGF(agbp)->agf_longest, &i);
+ be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
if (error)
goto out_del_cursor;
@@ -84,7 +84,7 @@ xfs_trim_extents(
if (error)
goto out_del_cursor;
XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
- ASSERT(flen <= XFS_BUF_TO_AGF(agbp)->agf_longest);
+ ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
/*
* Too small? Give up.
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 28de70b..e6ac98c 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -871,27 +871,6 @@ xfs_fs_dirty_inode(
}
STATIC int
-xfs_log_inode(
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
-}
-
-STATIC int
xfs_fs_write_inode(
struct inode *inode,
struct writeback_control *wbc)
@@ -904,10 +883,8 @@ xfs_fs_write_inode(
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
- if (!ip->i_update_core)
- return 0;
- if (wbc->sync_mode == WB_SYNC_ALL) {
+ if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
/*
* Make sure the inode has made it it into the log. Instead
* of forcing it all the way to stable storage using a
@@ -916,11 +893,14 @@ xfs_fs_write_inode(
* of synchronous log foces dramatically.
*/
xfs_ioend_wait(ip);
- error = xfs_log_inode(ip);
+ error = xfs_log_dirty_inode(ip, NULL, 0);
if (error)
goto out;
return 0;
} else {
+ if (!ip->i_update_core)
+ return 0;
+
/*
* We make this non-blocking if the inode is contended, return
* EAGAIN to indicate to the caller that they did not succeed.
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index b69688d..2f277a0 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -336,6 +336,32 @@ xfs_sync_fsdata(
return xfs_bwrite(mp, bp);
}
+int
+xfs_log_dirty_inode(
+ struct xfs_inode *ip,
+ struct xfs_perag *pag,
+ int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ if (!ip->i_update_core)
+ return 0;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return xfs_trans_commit(tp, 0);
+}
+
/*
* When remounting a filesystem read-only or freezing the filesystem, we have
* two phases to execute. This first phase is syncing the data before we
@@ -365,6 +391,17 @@ xfs_quiesce_data(
/* push and block till complete */
xfs_sync_data(mp, SYNC_WAIT);
+
+ /*
+ * Log all pending size and timestamp updates. The vfs writeback
+ * code is supposed to do this, but due to its overagressive
+ * livelock detection it will skip inodes where appending writes
+ * were written out in the first non-blocking sync phase if their
+ * completion took long enough that it happened after taking the
+ * timestamp for the cut-off in the blocking phase.
+ */
+ xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
+
xfs_qm_sync(mp, SYNC_WAIT);
/* write superblock and hoover up shutdown errors */
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e3a6ad2..ef5b2ce 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -42,6 +42,8 @@ void xfs_quiesce_attr(struct xfs_mount *mp);
void xfs_flush_inodes(struct xfs_inode *ip);
+int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
+
int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 3631783..ca752f0 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -356,9 +356,20 @@ xfs_iget_cache_miss(
BUG();
}
- spin_lock(&pag->pag_ici_lock);
+ /*
+ * These values must be set before inserting the inode into the radix
+ * tree as the moment it is inserted a concurrent lookup (allowed by the
+ * RCU locking mechanism) can find it and that lookup must see that this
+ * is an inode currently under construction (i.e. that XFS_INEW is set).
+ * The ip->i_flags_lock that protects the XFS_INEW flag forms the
+ * memory barrier that ensures this detection works correctly at lookup
+ * time.
+ */
+ ip->i_udquot = ip->i_gdquot = NULL;
+ xfs_iflags_set(ip, XFS_INEW);
/* insert the new inode */
+ spin_lock(&pag->pag_ici_lock);
error = radix_tree_insert(&pag->pag_ici_root, agino, ip);
if (unlikely(error)) {
WARN_ON(error != -EEXIST);
@@ -366,11 +377,6 @@ xfs_iget_cache_miss(
error = EAGAIN;
goto out_preload_end;
}
-
- /* These values _must_ be set before releasing the radix tree lock! */
- ip->i_udquot = ip->i_gdquot = NULL;
- xfs_iflags_set(ip, XFS_INEW);
-
spin_unlock(&pag->pag_ici_lock);
radix_tree_preload_end();
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 04142ca..b75fd67 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3159,37 +3159,26 @@ xlog_recover_process_iunlinks(
*/
continue;
}
+ /*
+ * Unlock the buffer so that it can be acquired in the normal
+ * course of the transaction to truncate and free each inode.
+ * Because we are not racing with anyone else here for the AGI
+ * buffer, we don't even need to hold it locked to read the
+ * initial unlinked bucket entries out of the buffer. We keep
+ * buffer reference though, so that it stays pinned in memory
+ * while we need the buffer.
+ */
agi = XFS_BUF_TO_AGI(agibp);
+ xfs_buf_unlock(agibp);
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
- /*
- * Release the agi buffer so that it can
- * be acquired in the normal course of the
- * transaction to truncate and free the inode.
- */
- xfs_buf_relse(agibp);
-
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
-
- /*
- * Reacquire the agibuffer and continue around
- * the loop. This should never fail as we know
- * the buffer was good earlier on.
- */
- error = xfs_read_agi(mp, NULL, agno, &agibp);
- ASSERT(error == 0);
- agi = XFS_BUF_TO_AGI(agibp);
}
}
-
- /*
- * Release the buffer for the current agi so we can
- * go on to the next one.
- */
- xfs_buf_relse(agibp);
+ xfs_buf_rele(agibp);
}
mp->m_dmevmask = mp_dmevmask;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6cc4d41..59509ae 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -554,7 +554,8 @@ xfs_readlink(
__func__, (unsigned long long) ip->i_ino,
(long long) pathlen);
ASSERT(0);
- return XFS_ERROR(EFSCORRUPTED);
+ error = XFS_ERROR(EFSCORRUPTED);
+ goto out;
}