From fe2bbc4832659b7ffc867cac03e0a92ae81e11e4 Mon Sep 17 00:00:00 2001 From: Henrik Kretzschmar Date: Wed, 6 Sep 2006 00:03:41 -0700 Subject: [PATCH] add missing desctiption in super.c Adds kernel-doc for alloc_super() type in fs/super.c. Signed-off-by: Henrik Kretzschmar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/super.c b/fs/super.c index 6d4e817..5c4c94d 100644 --- a/fs/super.c +++ b/fs/super.c @@ -49,6 +49,7 @@ DEFINE_SPINLOCK(sb_lock); /** * alloc_super - create new superblock + * @type: filesystem type superblock should belong to * * Allocates and initializes a new &struct super_block. alloc_super() * returns a pointer new superblock or %NULL if allocation had failed. -- cgit v1.1 From b835bebe95608c81270636a78b70333afb011925 Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 6 Sep 2006 22:02:22 +0000 Subject: [CIFS] Fix CIFS readdir access denied when SE Linux enabled CIFS had one path in which dentry was instantiated before the corresponding inode metadata was filled in. Fixes Redhat bugzilla bug #163493 Signed-off-by: Steve French Acked-by: Eric Paris Acked-by: Dave Kleikamp --- fs/cifs/readdir.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 105761e..9aeb58a 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -82,7 +82,6 @@ static int construct_dentry(struct qstr *qstring, struct file *file, if(*ptmp_inode == NULL) return rc; rc = 1; - d_instantiate(tmp_dentry, *ptmp_inode); } } else { tmp_dentry = d_alloc(file->f_dentry, qstring); @@ -99,9 +98,7 @@ static int construct_dentry(struct qstr *qstring, struct file *file, tmp_dentry->d_op = &cifs_dentry_ops; if(*ptmp_inode == NULL) return rc; - rc = 1; - d_instantiate(tmp_dentry, *ptmp_inode); - d_rehash(tmp_dentry); + rc = 2; } tmp_dentry->d_time = jiffies; @@ -870,6 +867,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, pfindEntry, &obj_type, rc); else fill_in_inode(tmp_inode, 1 /* NT */, pfindEntry, &obj_type, rc); + + if(rc) /* new inode - needs to be tied to dentry */ { + d_instantiate(tmp_dentry, tmp_inode); + if(rc == 2) + d_rehash(tmp_dentry); + } rc = filldir(direntry,qstring.name,qstring.len,file->f_pos, -- cgit v1.1 From 4be536debe3f7b0c62283e77fd6bd8bdb9f83c6f Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 7 Sep 2006 14:26:50 +1000 Subject: [XFS] Prevent free space oversubscription and xfssyncd looping. The fix for recent ENOSPC deadlocks introduced certain limitations on allocations. The fix could cause xfssyncd to loop endlessly if we did not leave some space free for the allocator to work correctly. Basically, we needed to ensure that we had at least 4 blocks free for an AG free list and a block for the inode bmap btree at all times. However, this did not take into account the fact that each AG has a free list that needs 4 blocks. Hence any filesystem with more than one AG could cause oversubscription of free space and make xfssyncd spin forever trying to allocate space needed for AG freelists that was not available in the AG. The following patch reserves space for the free lists in all AGs plus the inode bmap btree which prevents oversubscription. It also prevents those blocks from being reported as free space (as they can never be used) and makes the SMP in-core superblock accounting code and the reserved block ioctl respect this requirement. SGI-PV: 955674 SGI-Modid: xfs-linux-melb:xfs-kern:26894a Signed-off-by: David Chinner Signed-off-by: David Chatterton --- fs/xfs/xfs_alloc.h | 20 ++++++++++++++++++++ fs/xfs/xfs_fsops.c | 16 ++++++++++------ fs/xfs/xfs_mount.c | 32 ++++++++------------------------ fs/xfs/xfs_vfsops.c | 3 ++- 4 files changed, 40 insertions(+), 31 deletions(-) (limited to 'fs') diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 650591f..5a42561 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -44,6 +44,26 @@ typedef enum xfs_alloctype #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ /* + * In order to avoid ENOSPC-related deadlock caused by + * out-of-order locking of AGF buffer (PV 947395), we place + * constraints on the relationship among actual allocations for + * data blocks, freelist blocks, and potential file data bmap + * btree blocks. However, these restrictions may result in no + * actual space allocated for a delayed extent, for example, a data + * block in a certain AG is allocated but there is no additional + * block for the additional bmap btree block due to a split of the + * bmap btree of the file. The result of this may lead to an + * infinite loop in xfssyncd when the file gets flushed to disk and + * all delayed extents need to be actually allocated. To get around + * this, we explicitly set aside a few blocks which will not be + * reserved in delayed allocation. Considering the minimum number of + * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap + * btree requires 1 fsb, so we set the number of set-aside blocks + * to 4 + 4*agcount. + */ +#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) + +/* * Argument structure for xfs_alloc routines. * This is turned into a structure to avoid having 20 arguments passed * down several levels of the stack. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 077629b..c064e72 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -462,7 +462,7 @@ xfs_fs_counts( xfs_icsb_sync_counters_lazy(mp); s = XFS_SB_LOCK(mp); - cnt->freedata = mp->m_sb.sb_fdblocks; + cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); cnt->freertx = mp->m_sb.sb_frextents; cnt->freeino = mp->m_sb.sb_ifree; cnt->allocino = mp->m_sb.sb_icount; @@ -519,15 +519,19 @@ xfs_reserve_blocks( } mp->m_resblks = request; } else { + __int64_t free; + + free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); delta = request - mp->m_resblks; - lcounter = mp->m_sb.sb_fdblocks - delta; + lcounter = free - delta; if (lcounter < 0) { /* We can't satisfy the request, just get what we can */ - mp->m_resblks += mp->m_sb.sb_fdblocks; - mp->m_resblks_avail += mp->m_sb.sb_fdblocks; - mp->m_sb.sb_fdblocks = 0; + mp->m_resblks += free; + mp->m_resblks_avail += free; + mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp); } else { - mp->m_sb.sb_fdblocks = lcounter; + mp->m_sb.sb_fdblocks = + lcounter + XFS_ALLOC_SET_ASIDE(mp); mp->m_resblks = request; mp->m_resblks_avail += delta; } diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 4be5c0b..9dfae18 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) xfs_trans_log_buf(tp, bp, first, last); } -/* - * In order to avoid ENOSPC-related deadlock caused by - * out-of-order locking of AGF buffer (PV 947395), we place - * constraints on the relationship among actual allocations for - * data blocks, freelist blocks, and potential file data bmap - * btree blocks. However, these restrictions may result in no - * actual space allocated for a delayed extent, for example, a data - * block in a certain AG is allocated but there is no additional - * block for the additional bmap btree block due to a split of the - * bmap btree of the file. The result of this may lead to an - * infinite loop in xfssyncd when the file gets flushed to disk and - * all delayed extents need to be actually allocated. To get around - * this, we explicitly set aside a few blocks which will not be - * reserved in delayed allocation. Considering the minimum number of - * needed freelist blocks is 4 fsbs, a potential split of file's bmap - * btree requires 1 fsb, so we set the number of set-aside blocks to 8. -*/ -#define SET_ASIDE_BLOCKS 8 /* * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply @@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, return 0; case XFS_SBS_FDBLOCKS: - lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; + lcounter = (long long) + mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); if (delta > 0) { /* Putting blocks back */ @@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, } } - mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; + mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); return 0; case XFS_SBS_FREXTENTS: lcounter = (long long)mp->m_sb.sb_frextents; @@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy( * when we get near ENOSPC. */ #define XFS_ICSB_INO_CNTR_REENABLE 64 -#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 +#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \ + (512 + XFS_ALLOC_SET_ASIDE(mp)) STATIC void xfs_icsb_balance_counter( xfs_mount_t *mp, @@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter( case XFS_SBS_FDBLOCKS: count = mp->m_sb.sb_fdblocks; resid = do_div(count, weight); - if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) + if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp)) goto out; break; default: @@ -2110,11 +2094,11 @@ again: case XFS_SBS_FDBLOCKS: BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); - lcounter = icsbp->icsb_fdblocks; + lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); lcounter += delta; if (unlikely(lcounter < 0)) goto slow_path; - icsbp->icsb_fdblocks = lcounter; + icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); break; default: BUG(); diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c index b427d22..a34796e 100644 --- a/fs/xfs/xfs_vfsops.c +++ b/fs/xfs/xfs_vfsops.c @@ -811,7 +811,8 @@ xfs_statvfs( statp->f_bsize = sbp->sb_blocksize; lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; statp->f_blocks = sbp->sb_dblocks - lsize; - statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; + statp->f_bfree = statp->f_bavail = + sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp); fakeinos = statp->f_bfree << sbp->sb_inopblog; #if XFS_BIG_INUMS fakeinos += mp->m_inoadd; -- cgit v1.1 From 721259bce2851893155c6cb88a3f8ecb106b348c Mon Sep 17 00:00:00 2001 From: Lachlan McIlroy Date: Thu, 7 Sep 2006 14:27:05 +1000 Subject: [XFS] Fix ABBA deadlock between i_mutex and iolock. Avoid calling __blockdev_direct_IO for the DIO_OWN_LOCKING case for direct I/O reads since it drops and reacquires the i_mutex while holding the iolock and this violates the locking order. SGI-PV: 955696 SGI-Modid: xfs-linux-melb:xfs-kern:26898a Signed-off-by: Lachlan McIlroy Signed-off-by: David Chatterton --- fs/xfs/linux-2.6/xfs_aops.c | 18 +++++++++++++----- fs/xfs/linux-2.6/xfs_lrw.c | 11 ++++++----- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index c40f81b..34dcb43 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1390,11 +1390,19 @@ xfs_vm_direct_IO( iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN); - ret = blockdev_direct_IO_own_locking(rw, iocb, inode, - iomap.iomap_target->bt_bdev, - iov, offset, nr_segs, - xfs_get_blocks_direct, - xfs_end_io_direct); + if (rw == WRITE) { + ret = blockdev_direct_IO_own_locking(rw, iocb, inode, + iomap.iomap_target->bt_bdev, + iov, offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct); + } else { + ret = blockdev_direct_IO_no_locking(rw, iocb, inode, + iomap.iomap_target->bt_bdev, + iov, offset, nr_segs, + xfs_get_blocks_direct, + xfs_end_io_direct); + } if (unlikely(ret <= 0 && iocb->private)) xfs_destroy_ioend(iocb->private); diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 5d9cfd9..110c038 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -264,7 +264,9 @@ xfs_read( dmflags, &locktype); if (ret) { xfs_iunlock(ip, XFS_IOLOCK_SHARED); - goto unlock_mutex; + if (unlikely(ioflags & IO_ISDIRECT)) + mutex_unlock(&inode->i_mutex); + return ret; } } @@ -272,6 +274,9 @@ xfs_read( bhv_vop_flushinval_pages(vp, ctooff(offtoct(*offset)), -1, FI_REMAPF_LOCKED); + if (unlikely(ioflags & IO_ISDIRECT)) + mutex_unlock(&inode->i_mutex); + xfs_rw_enter_trace(XFS_READ_ENTER, &ip->i_iocore, (void *)iovp, segs, *offset, ioflags); ret = __generic_file_aio_read(iocb, iovp, segs, offset); @@ -281,10 +286,6 @@ xfs_read( XFS_STATS_ADD(xs_read_bytes, ret); xfs_iunlock(ip, XFS_IOLOCK_SHARED); - -unlock_mutex: - if (unlikely(ioflags & IO_ISDIRECT)) - mutex_unlock(&inode->i_mutex); return ret; } -- cgit v1.1 From 0a8d17d090a4939643a52194b7d4a4001b9b2d93 Mon Sep 17 00:00:00 2001 From: David Chinner Date: Thu, 7 Sep 2006 14:27:15 +1000 Subject: [XFS] Fix xfs_splice_write() so appended data gets to disk. xfs_splice_write() failed to update the on disk inode size when extending the so when the file was closed the range extended by splice was truncated off. Hence any region of a file written to by splice would end up as a hole full of zeros. SGI-PV: 955939 SGI-Modid: xfs-linux-melb:xfs-kern:26920a Signed-off-by: David Chinner Signed-off-by: David Chatterton --- fs/xfs/linux-2.6/xfs_lrw.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'fs') diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c index 110c038..ee788b1 100644 --- a/fs/xfs/linux-2.6/xfs_lrw.c +++ b/fs/xfs/linux-2.6/xfs_lrw.c @@ -391,6 +391,8 @@ xfs_splice_write( xfs_inode_t *ip = XFS_BHVTOI(bdp); xfs_mount_t *mp = ip->i_mount; ssize_t ret; + struct inode *inode = outfilp->f_mapping->host; + xfs_fsize_t isize; XFS_STATS_INC(xs_write_calls); if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -417,6 +419,20 @@ xfs_splice_write( if (ret > 0) XFS_STATS_ADD(xs_write_bytes, ret); + isize = i_size_read(inode); + if (unlikely(ret < 0 && ret != -EFAULT && *ppos > isize)) + *ppos = isize; + + if (*ppos > ip->i_d.di_size) { + xfs_ilock(ip, XFS_ILOCK_EXCL); + if (*ppos > ip->i_d.di_size) { + ip->i_d.di_size = *ppos; + i_size_write(inode, *ppos); + ip->i_update_core = 1; + ip->i_update_size = 1; + } + xfs_iunlock(ip, XFS_ILOCK_EXCL); + } xfs_iunlock(ip, XFS_IOLOCK_EXCL); return ret; } -- cgit v1.1 From 0edc7d0f3709e8c3bb7e69c4df614218a753361e Mon Sep 17 00:00:00 2001 From: Nathan Scott Date: Thu, 7 Sep 2006 14:27:23 +1000 Subject: [XFS] Fix a bad pointer dereference in the quota statvfs handling. SGI-PV: 955993 SGI-Modid: xfs-linux-melb:xfs-kern:26934a Signed-off-by: Nathan Scott Signed-off-by: David Chatterton --- fs/xfs/quota/xfs_qm_bhv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index f137856..db8872b 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c @@ -203,7 +203,7 @@ xfs_qm_statvfs( if (error || !vnode) return error; - mp = XFS_BHVTOM(bhv); + mp = xfs_vfstom(bhvtovfs(bhv)); ip = xfs_vtoi(vnode); if (!(ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)) -- cgit v1.1 From 3665d0e58fa44f50c744f85c7e8ad21d5b10e206 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Fri, 8 Sep 2006 09:48:21 -0700 Subject: [PATCH] ext3_getblk() should handle HOLE correctly It has been reported that ext3_getblk() is not doing the right thing and triggering following WARN(): BUG: warning at fs/ext3/inode.c:1016/ext3_getblk() ext3_getblk+0x98/0x2a6 md_wakeup_thread+0x26/0x2a ext3_bread+0x1f/0x88 ext3_quota_read+0x136/0x1ae v1_read_dqblk+0x61/0xac dquot_acquire+0xf6/0x107 ext3_acquire_dquot+0x46/0x68 dqget+0x155/0x1e7 dquot_transfer+0x3e0/0x3e9 dput+0x23/0x13e ext3_setattr+0xc3/0x240 current_fs_time+0x52/0x6a notify_change+0x2bd/0x30d chown_common+0x9c/0xc5 strncpy_from_user+0x3b/0x68 do_path_lookup+0xdf/0x266 __user_walk_fd+0x44/0x5a sys_chown+0x4a/0x55 vfs_write+0xe7/0x13c sys_mkdir+0x1f/0x23 syscall_call+0x7/0xb Looking at the code, it looks like it's not handle HOLE correctly. It ends up returning -EIO. Here is the patch to fix it. If we really want to be paranoid, we can allow return values 0 (HOLE), 1 (we asked for one block) and return -EIO for more than 1 block. But I really don't see a reason for doing it - all we need is the block# here. (doesn't matter how many blocks are mapped). ext3_get_blocks_handle() returns number of blocks it mapped. It returns 0 in case of HOLE. ext3_getblk() should handle HOLE properly (currently its dumping warning stack and returning -EIO). Signed-off-by: Badari Pulavarty Acked-by: Mingming Cao Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ext3/inode.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index c5ee9f0..0f0b1ea 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1009,11 +1009,14 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode, buffer_trace_init(&dummy.b_history); err = ext3_get_blocks_handle(handle, inode, block, 1, &dummy, create, 1); - if (err == 1) { + /* + * ext3_get_blocks_handle() returns number of blocks + * mapped. 0 in case of a HOLE. + */ + if (err > 0) { + if (err > 1) + WARN_ON(1); err = 0; - } else if (err >= 0) { - WARN_ON(1); - err = -EIO; } *errp = err; if (!err && buffer_mapped(&dummy)) { -- cgit v1.1 From e9f7bee1df223dcf83743b46cb06c08d95497ec0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 8 Sep 2006 09:48:54 -0700 Subject: [PATCH] NFS: large non-page-aligned direct I/O clobbers memory The logic in nfs_direct_read_schedule and nfs_direct_write_schedule can allow data->npages to be one larger than rpages. This causes a page pointer to be written beyond the end of the pagevec in nfs_read_data (or nfs_write_data). Fix this by making nfs_(read|write)_alloc() calculate the size of the pagevec array, and initialise data->npages. Also get rid of the redundant argument to nfs_commit_alloc(). Signed-off-by: Trond Myklebust Cc: Chuck Lever Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/nfs/direct.c | 50 ++++++++++++++------------------------------------ fs/nfs/read.c | 24 +++++++++++++----------- fs/nfs/write.c | 37 +++++++++++++++---------------------- 3 files changed, 42 insertions(+), 69 deletions(-) (limited to 'fs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index fecd3b0..76ca1cb 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -100,25 +100,6 @@ static inline int put_dreq(struct nfs_direct_req *dreq) return atomic_dec_and_test(&dreq->io_count); } -/* - * "size" is never larger than rsize or wsize. - */ -static inline int nfs_direct_count_pages(unsigned long user_addr, size_t size) -{ - int page_count; - - page_count = (user_addr + size + PAGE_SIZE - 1) >> PAGE_SHIFT; - page_count -= user_addr >> PAGE_SHIFT; - BUG_ON(page_count < 0); - - return page_count; -} - -static inline unsigned int nfs_max_pages(unsigned int size) -{ - return (size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -} - /** * nfs_direct_IO - NFS address space operation for direct I/O * @rw: direction (read or write) @@ -276,28 +257,24 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int rpages = nfs_max_pages(rsize); unsigned int pgbase; int result; ssize_t started = 0; get_dreq(dreq); - pgbase = user_addr & ~PAGE_MASK; do { struct nfs_read_data *data; size_t bytes; + pgbase = user_addr & ~PAGE_MASK; + bytes = min(rsize,count); + result = -ENOMEM; - data = nfs_readdata_alloc(rpages); + data = nfs_readdata_alloc(pgbase + bytes); if (unlikely(!data)) break; - bytes = rsize; - if (count < rsize) - bytes = count; - - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 1, 0, data->pagevec, NULL); @@ -344,8 +321,10 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo started += bytes; user_addr += bytes; pos += bytes; + /* FIXME: Remove this unnecessary math from final patch */ pgbase += bytes; pgbase &= ~PAGE_MASK; + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); count -= bytes; } while (count != 0); @@ -524,7 +503,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode static void nfs_alloc_commit_data(struct nfs_direct_req *dreq) { - dreq->commit_data = nfs_commit_alloc(0); + dreq->commit_data = nfs_commit_alloc(); if (dreq->commit_data != NULL) dreq->commit_data->req = (struct nfs_page *) dreq; } @@ -605,28 +584,24 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l struct nfs_open_context *ctx = dreq->ctx; struct inode *inode = ctx->dentry->d_inode; size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int wpages = nfs_max_pages(wsize); unsigned int pgbase; int result; ssize_t started = 0; get_dreq(dreq); - pgbase = user_addr & ~PAGE_MASK; do { struct nfs_write_data *data; size_t bytes; + pgbase = user_addr & ~PAGE_MASK; + bytes = min(wsize,count); + result = -ENOMEM; - data = nfs_writedata_alloc(wpages); + data = nfs_writedata_alloc(pgbase + bytes); if (unlikely(!data)) break; - bytes = wsize; - if (count < wsize) - bytes = count; - - data->npages = nfs_direct_count_pages(user_addr, bytes); down_read(¤t->mm->mmap_sem); result = get_user_pages(current, current->mm, user_addr, data->npages, 0, 0, data->pagevec, NULL); @@ -676,8 +651,11 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l started += bytes; user_addr += bytes; pos += bytes; + + /* FIXME: Remove this useless math from the final patch */ pgbase += bytes; pgbase &= ~PAGE_MASK; + BUG_ON(pgbase != (user_addr & ~PAGE_MASK)); count -= bytes; } while (count != 0); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index da9cf11..7a9ee00 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -43,13 +43,15 @@ static mempool_t *nfs_rdata_mempool; #define MIN_POOL_READ (32) -struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) +struct nfs_read_data *nfs_readdata_alloc(size_t len) { + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -140,7 +142,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, int result; struct nfs_read_data *rdata; - rdata = nfs_readdata_alloc(1); + rdata = nfs_readdata_alloc(count); if (!rdata) return -ENOMEM; @@ -336,25 +338,25 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode) struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; struct nfs_read_data *data; - unsigned int rsize = NFS_SERVER(inode)->rsize; - unsigned int nbytes, offset; + size_t rsize = NFS_SERVER(inode)->rsize, nbytes; + unsigned int offset; int requests = 0; LIST_HEAD(list); nfs_list_remove_request(req); nbytes = req->wb_bytes; - for(;;) { - data = nfs_readdata_alloc(1); + do { + size_t len = min(nbytes,rsize); + + data = nfs_readdata_alloc(len); if (!data) goto out_bad; INIT_LIST_HEAD(&data->pages); list_add(&data->pages, &list); requests++; - if (nbytes <= rsize) - break; - nbytes -= rsize; - } + nbytes -= len; + } while(nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); @@ -402,7 +404,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode) if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) return nfs_pagein_multi(head, inode); - data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); + data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); if (!data) goto out_bad; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 5077499..8ab3cf1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -90,22 +90,13 @@ static mempool_t *nfs_commit_mempool; static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); -struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_commit_alloc(void) { struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); - if (pagecount <= ARRAY_SIZE(p->page_array)) - p->pagevec = p->page_array; - else { - p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); - if (!p->pagevec) { - mempool_free(p, nfs_commit_mempool); - p = NULL; - } - } } return p; } @@ -117,13 +108,15 @@ void nfs_commit_free(struct nfs_write_data *p) mempool_free(p, nfs_commit_mempool); } -struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) +struct nfs_write_data *nfs_writedata_alloc(size_t len) { + unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->pages); + p->npages = pagecount; if (pagecount <= ARRAY_SIZE(p->page_array)) p->pagevec = p->page_array; else { @@ -208,7 +201,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode, int result, written = 0; struct nfs_write_data *wdata; - wdata = nfs_writedata_alloc(1); + wdata = nfs_writedata_alloc(wsize); if (!wdata) return -ENOMEM; @@ -999,24 +992,24 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how) struct nfs_page *req = nfs_list_entry(head->next); struct page *page = req->wb_page; struct nfs_write_data *data; - unsigned int wsize = NFS_SERVER(inode)->wsize; - unsigned int nbytes, offset; + size_t wsize = NFS_SERVER(inode)->wsize, nbytes; + unsigned int offset; int requests = 0; LIST_HEAD(list); nfs_list_remove_request(req); nbytes = req->wb_bytes; - for (;;) { - data = nfs_writedata_alloc(1); + do { + size_t len = min(nbytes, wsize); + + data = nfs_writedata_alloc(len); if (!data) goto out_bad; list_add(&data->pages, &list); requests++; - if (nbytes <= wsize) - break; - nbytes -= wsize; - } + nbytes -= len; + } while (nbytes != 0); atomic_set(&req->wb_complete, requests); ClearPageError(page); @@ -1070,7 +1063,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how) struct nfs_write_data *data; unsigned int count; - data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages); + data = nfs_writedata_alloc(NFS_SERVER(inode)->wsize); if (!data) goto out_bad; @@ -1378,7 +1371,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how) struct nfs_write_data *data; struct nfs_page *req; - data = nfs_commit_alloc(NFS_SERVER(inode)->wpages); + data = nfs_commit_alloc(); if (!data) goto out_bad; -- cgit v1.1