aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/9p/acl.c59
-rw-r--r--fs/9p/acl.h8
-rw-r--r--fs/9p/v9fs.c33
-rw-r--r--fs/9p/v9fs_vfs.h3
-rw-r--r--fs/9p/vfs_dir.c14
-rw-r--r--fs/9p/vfs_file.c22
-rw-r--r--fs/9p/vfs_inode.c91
-rw-r--r--fs/9p/vfs_inode_dotl.c23
-rw-r--r--fs/Kconfig23
-rw-r--r--fs/Makefile6
-rw-r--r--fs/adfs/inode.c2
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/amigaffs.c6
-rw-r--r--fs/affs/file.c8
-rw-r--r--fs/affs/inode.c8
-rw-r--r--fs/affs/namei.c6
-rw-r--r--fs/afs/afs_vl.h2
-rw-r--r--fs/afs/fsclient.c2
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/internal.h4
-rw-r--r--fs/afs/security.c6
-rw-r--r--fs/afs/write.c18
-rw-r--r--fs/autofs4/autofs_i.h26
-rw-r--r--fs/autofs4/dev-ioctl.c11
-rw-r--r--fs/autofs4/expire.c10
-rw-r--r--fs/autofs4/inode.c2
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/autofs4/waitq.c4
-rw-r--r--fs/befs/linuxvfs.c2
-rw-r--r--fs/bfs/dir.c2
-rw-r--r--fs/bfs/inode.c2
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/ceph/addr.c201
-rw-r--r--fs/ceph/caps.c205
-rw-r--r--fs/ceph/debugfs.c2
-rw-r--r--fs/ceph/dir.c233
-rw-r--r--fs/ceph/export.c24
-rw-r--r--fs/ceph/file.c100
-rw-r--r--fs/ceph/inode.c174
-rw-r--r--fs/ceph/ioctl.c53
-rw-r--r--fs/ceph/ioctl.h56
-rw-r--r--fs/ceph/mds_client.c120
-rw-r--r--fs/ceph/mds_client.h5
-rw-r--r--fs/ceph/snap.c70
-rw-r--r--fs/ceph/super.c99
-rw-r--r--fs/ceph/super.h98
-rw-r--r--fs/ceph/xattr.c50
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/coda/coda_int.h2
-rw-r--r--fs/coda/coda_linux.c2
-rw-r--r--fs/coda/coda_linux.h7
-rw-r--r--fs/coda/dir.c11
-rw-r--r--fs/coda/file.c8
-rw-r--r--fs/coda/pioctl.c4
-rw-r--r--fs/configfs/dir.c16
-rw-r--r--fs/configfs/inode.c5
-rw-r--r--fs/configfs/item.c2
-rw-r--r--fs/configfs/mount.c36
-rw-r--r--fs/dlm/ast.c265
-rw-r--r--fs/dlm/ast.h15
-rw-r--r--fs/dlm/config.c75
-rw-r--r--fs/dlm/config.h2
-rw-r--r--fs/dlm/dlm_internal.h29
-rw-r--r--fs/dlm/lock.c225
-rw-r--r--fs/dlm/lockspace.c177
-rw-r--r--fs/dlm/lowcomms.c9
-rw-r--r--fs/dlm/memory.c22
-rw-r--r--fs/dlm/memory.h2
-rw-r--r--fs/dlm/plock.c10
-rw-r--r--fs/dlm/recoverd.c12
-rw-r--r--fs/dlm/user.c12
-rw-r--r--fs/efs/inode.c2
-rw-r--r--fs/efs/namei.c7
-rw-r--r--fs/exofs/Kbuild6
-rw-r--r--fs/exofs/exofs.h169
-rw-r--r--fs/exofs/file.c10
-rw-r--r--fs/exofs/inode.c357
-rw-r--r--fs/exofs/ios.c803
-rw-r--r--fs/exofs/namei.c7
-rw-r--r--fs/exofs/pnfs.h45
-rw-r--r--fs/exofs/super.c369
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext3/acl.c97
-rw-r--r--fs/ext3/acl.h4
-rw-r--r--fs/ext3/balloc.c55
-rw-r--r--fs/ext3/dir.c167
-rw-r--r--fs/ext3/file.c3
-rw-r--r--fs/ext3/fsync.c17
-rw-r--r--fs/ext3/hash.c4
-rw-r--r--fs/ext3/ialloc.c51
-rw-r--r--fs/ext3/inode.c207
-rw-r--r--fs/ext3/ioctl.c28
-rw-r--r--fs/ext3/namei.c34
-rw-r--r--fs/ext3/super.c34
-rw-r--r--fs/ext3/xattr_security.c36
-rw-r--r--fs/freevxfs/vxfs_inode.c2
-rw-r--r--fs/gfs2/acl.c86
-rw-r--r--fs/gfs2/acl.h2
-rw-r--r--fs/gfs2/aops.c8
-rw-r--r--fs/gfs2/bmap.c213
-rw-r--r--fs/gfs2/dir.c269
-rw-r--r--fs/gfs2/dir.h1
-rw-r--r--fs/gfs2/file.c304
-rw-r--r--fs/gfs2/glock.c39
-rw-r--r--fs/gfs2/glock.h8
-rw-r--r--fs/gfs2/glops.c98
-rw-r--r--fs/gfs2/glops.h2
-rw-r--r--fs/gfs2/incore.h26
-rw-r--r--fs/gfs2/inode.c185
-rw-r--r--fs/gfs2/inode.h4
-rw-r--r--fs/gfs2/log.c4
-rw-r--r--fs/gfs2/lops.c66
-rw-r--r--fs/gfs2/main.c3
-rw-r--r--fs/gfs2/meta_io.c6
-rw-r--r--fs/gfs2/ops_fstype.c10
-rw-r--r--fs/gfs2/quota.c30
-rw-r--r--fs/gfs2/rgrp.c625
-rw-r--r--fs/gfs2/rgrp.h35
-rw-r--r--fs/gfs2/super.c142
-rw-r--r--fs/gfs2/trans.c5
-rw-r--r--fs/gfs2/trans.h22
-rw-r--r--fs/gfs2/xattr.c28
-rw-r--r--fs/jffs2/acl.c51
-rw-r--r--fs/jffs2/acl.h6
-rw-r--r--fs/jffs2/compr.c128
-rw-r--r--fs/jffs2/compr.h2
-rw-r--r--fs/jffs2/compr_rtime.c4
-rw-r--r--fs/jffs2/dir.c17
-rw-r--r--fs/jffs2/file.c50
-rw-r--r--fs/jffs2/fs.c12
-rw-r--r--fs/jffs2/jffs2_fs_sb.h6
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/nodemgmt.c20
-rw-r--r--fs/jffs2/os-linux.h6
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/scan.c9
-rw-r--r--fs/jffs2/security.c35
-rw-r--r--fs/jffs2/super.c119
-rw-r--r--fs/jffs2/symlink.c2
-rw-r--r--fs/jffs2/wbuf.c17
-rw-r--r--fs/logfs/dir.c12
-rw-r--r--fs/logfs/file.c11
-rw-r--r--fs/logfs/inode.c3
-rw-r--r--fs/logfs/logfs.h3
-rw-r--r--fs/logfs/readwrite.c2
-rw-r--r--fs/logfs/super.c23
-rw-r--r--fs/minix/bitmap.c55
-rw-r--r--fs/minix/inode.c34
-rw-r--r--fs/minix/minix.h11
-rw-r--r--fs/ncpfs/dir.c11
-rw-r--r--fs/ncpfs/file.c4
-rw-r--r--fs/ncpfs/inode.c10
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/ncplib_kernel.h4
-rw-r--r--fs/nfsctl.c100
-rw-r--r--fs/ntfs/debug.h15
-rw-r--r--fs/ntfs/dir.c10
-rw-r--r--fs/ntfs/file.c13
-rw-r--r--fs/ntfs/inode.c18
-rw-r--r--fs/ntfs/inode.h2
-rw-r--r--fs/omfs/dir.c2
-rw-r--r--fs/omfs/inode.c3
-rw-r--r--fs/openpromfs/inode.c4
-rw-r--r--fs/partitions/check.c25
-rw-r--r--fs/partitions/ldm.c16
-rw-r--r--fs/pstore/inode.c52
-rw-r--r--fs/pstore/internal.h4
-rw-r--r--fs/pstore/platform.c139
-rw-r--r--fs/qnx4/inode.c2
-rw-r--r--fs/quota/dquot.c14
-rw-r--r--fs/quota/quota.c9
-rw-r--r--fs/ramfs/inode.c10
-rw-r--r--fs/romfs/super.c2
-rw-r--r--fs/squashfs/Kconfig42
-rw-r--r--fs/squashfs/Makefile3
-rw-r--r--fs/squashfs/decompressor.c6
-rw-r--r--fs/squashfs/decompressor.h4
-rw-r--r--fs/squashfs/inode.c18
-rw-r--r--fs/squashfs/namei.c10
-rw-r--r--fs/squashfs/squashfs.h3
-rw-r--r--fs/squashfs/squashfs_fs.h7
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/sysv/inode.c2
-rw-r--r--fs/xfs/Makefile121
-rw-r--r--fs/xfs/linux-2.6/kmem.c132
-rw-r--r--fs/xfs/linux-2.6/kmem.h124
-rw-r--r--fs/xfs/linux-2.6/mrlock.h90
-rw-r--r--fs/xfs/linux-2.6/time.h36
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c464
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c1506
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h68
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c1899
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h351
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.c222
-rw-r--r--fs/xfs/linux-2.6/xfs_discard.h10
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c253
-rw-r--r--fs/xfs/linux-2.6/xfs_export.h72
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c1114
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c96
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c43
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c1556
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.h85
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c672
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h237
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c778
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.h30
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h307
-rw-r--r--fs/xfs/linux-2.6/xfs_message.c108
-rw-r--r--fs/xfs/linux-2.6/xfs_message.h39
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c139
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.c122
-rw-r--r--fs/xfs/linux-2.6/xfs_stats.h223
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c1720
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h87
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c1132
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h62
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.c252
-rw-r--r--fs/xfs/linux-2.6/xfs_sysctl.h102
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c56
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1776
-rw-r--r--fs/xfs/linux-2.6/xfs_vnode.h64
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c241
-rw-r--r--fs/xfs/quota/xfs_dquot.c1496
-rw-r--r--fs/xfs/quota/xfs_dquot.h143
-rw-r--r--fs/xfs/quota/xfs_dquot_item.c533
-rw-r--r--fs/xfs/quota/xfs_dquot_item.h48
-rw-r--r--fs/xfs/quota/xfs_qm.c2462
-rw-r--r--fs/xfs/quota/xfs_qm.h172
-rw-r--r--fs/xfs/quota/xfs_qm_bhv.c176
-rw-r--r--fs/xfs/quota/xfs_qm_stats.c105
-rw-r--r--fs/xfs/quota/xfs_qm_stats.h53
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c1259
-rw-r--r--fs/xfs/quota/xfs_quota_priv.h53
-rw-r--r--fs/xfs/quota/xfs_trans_dquot.c895
-rw-r--r--fs/xfs/support/uuid.c63
-rw-r--r--fs/xfs/support/uuid.h29
-rw-r--r--fs/xfs/xfs.h4
-rw-r--r--fs/xfs/xfs_acl.h7
-rw-r--r--fs/xfs/xfs_ag.h6
-rw-r--r--fs/xfs/xfs_alloc.c28
-rw-r--r--fs/xfs/xfs_alloc_btree.c84
-rw-r--r--fs/xfs/xfs_arch.h136
-rw-r--r--fs/xfs/xfs_attr.c99
-rw-r--r--fs/xfs/xfs_attr_leaf.c67
-rw-r--r--fs/xfs/xfs_attr_leaf.h11
-rw-r--r--fs/xfs/xfs_bmap.c2550
-rw-r--r--fs/xfs/xfs_bmap.h318
-rw-r--r--fs/xfs/xfs_bmap_btree.c106
-rw-r--r--fs/xfs/xfs_btree.c55
-rw-r--r--fs/xfs/xfs_btree.h40
-rw-r--r--fs/xfs/xfs_btree_trace.c249
-rw-r--r--fs/xfs/xfs_btree_trace.h99
-rw-r--r--fs/xfs/xfs_buf_item.c110
-rw-r--r--fs/xfs/xfs_da_btree.c360
-rw-r--r--fs/xfs/xfs_da_btree.h13
-rw-r--r--fs/xfs/xfs_dfrag.c6
-rw-r--r--fs/xfs/xfs_dinode.h2
-rw-r--r--fs/xfs/xfs_dir2.c156
-rw-r--r--fs/xfs/xfs_dir2.h54
-rw-r--r--fs/xfs/xfs_dir2_block.c253
-rw-r--r--fs/xfs/xfs_dir2_block.h92
-rw-r--r--fs/xfs/xfs_dir2_data.c327
-rw-r--r--fs/xfs/xfs_dir2_data.h184
-rw-r--r--fs/xfs/xfs_dir2_leaf.c423
-rw-r--r--fs/xfs/xfs_dir2_leaf.h253
-rw-r--r--fs/xfs/xfs_dir2_node.c203
-rw-r--r--fs/xfs/xfs_dir2_node.h100
-rw-r--r--fs/xfs/xfs_dir2_sf.c338
-rw-r--r--fs/xfs/xfs_dir2_sf.h171
-rw-r--r--fs/xfs/xfs_extfree_item.c4
-rw-r--r--fs/xfs/xfs_filestream.c18
-rw-r--r--fs/xfs/xfs_fs.h5
-rw-r--r--fs/xfs/xfs_fsops.c60
-rw-r--r--fs/xfs/xfs_ialloc.c32
-rw-r--r--fs/xfs/xfs_ialloc_btree.c75
-rw-r--r--fs/xfs/xfs_iget.c3
-rw-r--r--fs/xfs/xfs_inode.c588
-rw-r--r--fs/xfs/xfs_inode.h28
-rw-r--r--fs/xfs/xfs_inode_item.c23
-rw-r--r--fs/xfs/xfs_inum.h11
-rw-r--r--fs/xfs/xfs_iomap.c39
-rw-r--r--fs/xfs/xfs_log.c451
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_recover.c123
-rw-r--r--fs/xfs/xfs_mount.c84
-rw-r--r--fs/xfs/xfs_mount.h2
-rw-r--r--fs/xfs/xfs_rename.c12
-rw-r--r--fs/xfs/xfs_rtalloc.c80
-rw-r--r--fs/xfs/xfs_rtalloc.h2
-rw-r--r--fs/xfs/xfs_rw.c27
-rw-r--r--fs/xfs/xfs_rw.h2
-rw-r--r--fs/xfs/xfs_sb.h2
-rw-r--r--fs/xfs/xfs_trans.c13
-rw-r--r--fs/xfs/xfs_trans.h14
-rw-r--r--fs/xfs/xfs_trans_ail.c239
-rw-r--r--fs/xfs/xfs_trans_buf.c168
-rw-r--r--fs/xfs/xfs_trans_inode.c34
-rw-r--r--fs/xfs/xfs_trans_priv.h5
-rw-r--r--fs/xfs/xfs_vnodeops.c604
-rw-r--r--fs/xfs/xfs_vnodeops.h6
-rw-r--r--fs/yaffs2/Kconfig161
-rw-r--r--fs/yaffs2/Makefile17
-rw-r--r--fs/yaffs2/yaffs_allocator.c396
-rw-r--r--fs/yaffs2/yaffs_allocator.h30
-rw-r--r--fs/yaffs2/yaffs_attribs.c124
-rw-r--r--fs/yaffs2/yaffs_attribs.h28
-rw-r--r--fs/yaffs2/yaffs_bitmap.c98
-rw-r--r--fs/yaffs2/yaffs_bitmap.h33
-rw-r--r--fs/yaffs2/yaffs_checkptrw.c415
-rw-r--r--fs/yaffs2/yaffs_checkptrw.h33
-rw-r--r--fs/yaffs2/yaffs_ecc.c298
-rw-r--r--fs/yaffs2/yaffs_ecc.h44
-rw-r--r--fs/yaffs2/yaffs_getblockinfo.h35
-rw-r--r--fs/yaffs2/yaffs_guts.c5164
-rw-r--r--fs/yaffs2/yaffs_guts.h915
-rw-r--r--fs/yaffs2/yaffs_linux.h41
-rw-r--r--fs/yaffs2/yaffs_mtdif.c54
-rw-r--r--fs/yaffs2/yaffs_mtdif.h23
-rw-r--r--fs/yaffs2/yaffs_mtdif1.c330
-rw-r--r--fs/yaffs2/yaffs_mtdif1.h29
-rw-r--r--fs/yaffs2/yaffs_mtdif2.c225
-rw-r--r--fs/yaffs2/yaffs_mtdif2.h29
-rw-r--r--fs/yaffs2/yaffs_nameval.c201
-rw-r--r--fs/yaffs2/yaffs_nameval.h28
-rw-r--r--fs/yaffs2/yaffs_nand.c127
-rw-r--r--fs/yaffs2/yaffs_nand.h38
-rw-r--r--fs/yaffs2/yaffs_packedtags1.c53
-rw-r--r--fs/yaffs2/yaffs_packedtags1.h39
-rw-r--r--fs/yaffs2/yaffs_packedtags2.c196
-rw-r--r--fs/yaffs2/yaffs_packedtags2.h47
-rw-r--r--fs/yaffs2/yaffs_tagscompat.c422
-rw-r--r--fs/yaffs2/yaffs_tagscompat.h36
-rw-r--r--fs/yaffs2/yaffs_tagsvalidity.c27
-rw-r--r--fs/yaffs2/yaffs_tagsvalidity.h23
-rw-r--r--fs/yaffs2/yaffs_trace.h57
-rw-r--r--fs/yaffs2/yaffs_verify.c535
-rw-r--r--fs/yaffs2/yaffs_verify.h43
-rw-r--r--fs/yaffs2/yaffs_vfs.c2792
-rw-r--r--fs/yaffs2/yaffs_yaffs1.c433
-rw-r--r--fs/yaffs2/yaffs_yaffs1.h22
-rw-r--r--fs/yaffs2/yaffs_yaffs2.c1598
-rw-r--r--fs/yaffs2/yaffs_yaffs2.h39
-rw-r--r--fs/yaffs2/yportenv.h70
-rw-r--r--kernel/sysctl_binary.c4
-rw-r--r--kernel/sysctl_check.c2
346 files changed, 7704 insertions, 49987 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index 4a866cd..9a1d426 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -96,14 +96,10 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
return acl;
}
-int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
+struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type)
{
- struct posix_acl *acl;
struct v9fs_session_info *v9ses;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
-
v9ses = v9fs_inode2v9ses(inode);
if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) ||
((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) {
@@ -111,18 +107,10 @@ int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags)
* On access = client and acl = on mode get the acl
* values from the server
*/
- return 0;
+ return NULL;
}
- acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+ return v9fs_get_cached_acl(inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- int error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- return error;
- }
- return -EAGAIN;
}
static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
@@ -165,21 +153,18 @@ err_free_out:
int v9fs_acl_chmod(struct dentry *dentry)
{
int retval = 0;
- struct posix_acl *acl, *clone;
+ struct posix_acl *acl;
struct inode *inode = dentry->d_inode;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
if (acl) {
- clone = posix_acl_clone(acl, GFP_KERNEL);
+ retval = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (retval)
+ return retval;
+ retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, acl);
posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
- retval = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!retval)
- retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone);
- posix_acl_release(clone);
}
return retval;
}
@@ -197,11 +182,11 @@ int v9fs_set_create_acl(struct dentry *dentry,
return 0;
}
-int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+int v9fs_acl_mode(struct inode *dir, umode_t *modep,
struct posix_acl **dpacl, struct posix_acl **pacl)
{
int retval = 0;
- mode_t mode = *modep;
+ umode_t mode = *modep;
struct posix_acl *acl = NULL;
if (!S_ISLNK(mode)) {
@@ -212,30 +197,18 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep,
mode &= ~current_umask();
}
if (acl) {
- struct posix_acl *clone;
-
if (S_ISDIR(mode))
*dpacl = posix_acl_dup(acl);
- clone = posix_acl_clone(acl, GFP_NOFS);
- posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
-
- retval = posix_acl_create_masq(clone, &mode);
- if (retval < 0) {
- posix_acl_release(clone);
- goto cleanup;
- }
+ retval = posix_acl_create(&acl, GFP_NOFS, &mode);
+ if (retval < 0)
+ return retval;
if (retval > 0)
- *pacl = clone;
+ *pacl = acl;
else
- posix_acl_release(clone);
+ posix_acl_release(acl);
}
*modep = mode;
return 0;
-cleanup:
- return retval;
-
}
static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
@@ -346,7 +319,7 @@ static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
retval = posix_acl_equiv_mode(acl, &mode);
if (retval < 0)
goto err_out;
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
index c47ea9c..5595564 100644
--- a/fs/9p/acl.h
+++ b/fs/9p/acl.h
@@ -16,14 +16,14 @@
#ifdef CONFIG_9P_FS_POSIX_ACL
extern int v9fs_get_acl(struct inode *, struct p9_fid *);
-extern int v9fs_check_acl(struct inode *inode, int mask, unsigned int flags);
+extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type);
extern int v9fs_acl_chmod(struct dentry *);
extern int v9fs_set_create_acl(struct dentry *,
struct posix_acl **, struct posix_acl **);
-extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+extern int v9fs_acl_mode(struct inode *dir, umode_t *modep,
struct posix_acl **dpacl, struct posix_acl **pacl);
#else
-#define v9fs_check_acl NULL
+#define v9fs_iop_get_acl NULL
static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
{
return 0;
@@ -38,7 +38,7 @@ static inline int v9fs_set_create_acl(struct dentry *dentry,
{
return 0;
}
-static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+static inline int v9fs_acl_mode(struct inode *dir, umode_t *modep,
struct posix_acl **dpacl,
struct posix_acl **pacl)
{
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index ef96618..2b78014 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -132,21 +132,19 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
options = tmp_options;
while ((p = strsep(&options, ",")) != NULL) {
- int token;
+ int token, r;
if (!*p)
continue;
token = match_token(p, tokens, args);
- if (token < Opt_uname) {
- int r = match_int(&args[0], &option);
+ switch (token) {
+ case Opt_debug:
+ r = match_int(&args[0], &option);
if (r < 0) {
P9_DPRINTK(P9_DEBUG_ERROR,
- "integer field, but no integer?\n");
+ "integer field, but no integer?\n");
ret = r;
continue;
}
- }
- switch (token) {
- case Opt_debug:
v9ses->debug = option;
#ifdef CONFIG_NET_9P_DEBUG
p9_debug_level = option;
@@ -154,12 +152,33 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
break;
case Opt_dfltuid:
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ ret = r;
+ continue;
+ }
v9ses->dfltuid = option;
break;
case Opt_dfltgid:
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ ret = r;
+ continue;
+ }
v9ses->dfltgid = option;
break;
case Opt_afid:
+ r = match_int(&args[0], &option);
+ if (r < 0) {
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "integer field, but no integer?\n");
+ ret = r;
+ continue;
+ }
v9ses->afid = option;
break;
case Opt_uname:
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index f9a28ea..410ffd6 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64);
void v9fs_blank_wstat(struct p9_wstat *wstat);
int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
-int v9fs_file_fsync_dotl(struct file *filp, int datasync);
+int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+ int datasync);
ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *,
const char __user *, size_t, loff_t *, int);
int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 9c2bdda..598fff1 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -165,9 +165,8 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
while (rdir->head < rdir->tail) {
p9stat_init(&st);
- err = p9stat_read(rdir->buf + rdir->head,
- rdir->tail - rdir->head, &st,
- fid->clnt->proto_version);
+ err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
+ rdir->tail - rdir->head, &st);
if (err) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
err = -EIO;
@@ -231,7 +230,7 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
while (err == 0) {
if (rdir->tail == rdir->head) {
err = p9_client_readdir(fid, rdir->buf, buflen,
- filp->f_pos);
+ filp->f_pos);
if (err <= 0)
goto unlock_and_exit;
@@ -241,10 +240,9 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
while (rdir->head < rdir->tail) {
- err = p9dirent_read(rdir->buf + rdir->head,
- rdir->tail - rdir->head,
- &curdirent,
- fid->clnt->proto_version);
+ err = p9dirent_read(fid->clnt, rdir->buf + rdir->head,
+ rdir->tail - rdir->head,
+ &curdirent);
if (err < 0) {
P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
err = -EIO;
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 9d6e168..62857a8 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -539,32 +539,50 @@ out:
}
-static int v9fs_file_fsync(struct file *filp, int datasync)
+static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct p9_fid *fid;
+ struct inode *inode = filp->f_mapping->host;
struct p9_wstat wstat;
int retval;
+ retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (retval)
+ return retval;
+
+ mutex_lock(&inode->i_mutex);
P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync);
fid = filp->private_data;
v9fs_blank_wstat(&wstat);
retval = p9_client_wstat(fid, &wstat);
+ mutex_unlock(&inode->i_mutex);
+
return retval;
}
-int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct p9_fid *fid;
+ struct inode *inode = filp->f_mapping->host;
int retval;
+ retval = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (retval)
+ return retval;
+
+ mutex_lock(&inode->i_mutex);
P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
filp, datasync);
fid = filp->private_data;
retval = p9_client_fsync(fid, datasync);
+ mutex_unlock(&inode->i_mutex);
+
return retval;
}
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index c72e20c..bf1df72 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -278,10 +278,8 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
case S_IFSOCK:
if (v9fs_proto_dotl(v9ses)) {
inode->i_op = &v9fs_file_inode_operations_dotl;
- inode->i_fop = &v9fs_file_operations_dotl;
} else if (v9fs_proto_dotu(v9ses)) {
inode->i_op = &v9fs_file_inode_operations;
- inode->i_fop = &v9fs_file_operations;
} else {
P9_DPRINTK(P9_DEBUG_ERROR,
"special files without extended mode\n");
@@ -529,8 +527,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb,
unlock_new_inode(inode);
return inode;
error:
- unlock_new_inode(inode);
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(retval);
}
@@ -553,40 +550,66 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
}
/**
+ * v9fs_at_to_dotl_flags- convert Linux specific AT flags to
+ * plan 9 AT flag.
+ * @flags: flags to convert
+ */
+static int v9fs_at_to_dotl_flags(int flags)
+{
+ int rflags = 0;
+ if (flags & AT_REMOVEDIR)
+ rflags |= P9_DOTL_AT_REMOVEDIR;
+ return rflags;
+}
+
+/**
* v9fs_remove - helper function to remove files and directories
* @dir: directory inode that is being deleted
- * @file: dentry that is being deleted
+ * @dentry: dentry that is being deleted
* @rmdir: removing a directory
*
*/
-static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
+static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
{
- int retval;
- struct p9_fid *v9fid;
- struct inode *file_inode;
-
- P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %d\n", dir, file,
- rmdir);
+ struct inode *inode;
+ int retval = -EOPNOTSUPP;
+ struct p9_fid *v9fid, *dfid;
+ struct v9fs_session_info *v9ses;
- file_inode = file->d_inode;
- v9fid = v9fs_fid_clone(file);
- if (IS_ERR(v9fid))
- return PTR_ERR(v9fid);
+ P9_DPRINTK(P9_DEBUG_VFS, "inode: %p dentry: %p rmdir: %x\n",
+ dir, dentry, flags);
- retval = p9_client_remove(v9fid);
+ v9ses = v9fs_inode2v9ses(dir);
+ inode = dentry->d_inode;
+ dfid = v9fs_fid_lookup(dentry->d_parent);
+ if (IS_ERR(dfid)) {
+ retval = PTR_ERR(dfid);
+ P9_DPRINTK(P9_DEBUG_VFS, "fid lookup failed %d\n", retval);
+ return retval;
+ }
+ if (v9fs_proto_dotl(v9ses))
+ retval = p9_client_unlinkat(dfid, dentry->d_name.name,
+ v9fs_at_to_dotl_flags(flags));
+ if (retval == -EOPNOTSUPP) {
+ /* Try the one based on path */
+ v9fid = v9fs_fid_clone(dentry);
+ if (IS_ERR(v9fid))
+ return PTR_ERR(v9fid);
+ retval = p9_client_remove(v9fid);
+ }
if (!retval) {
/*
* directories on unlink should have zero
* link count
*/
- if (rmdir) {
- clear_nlink(file_inode);
+ if (flags & AT_REMOVEDIR) {
+ clear_nlink(inode);
drop_nlink(dir);
} else
- drop_nlink(file_inode);
+ drop_nlink(inode);
- v9fs_invalidate_inode_attr(file_inode);
+ v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
}
return retval;
@@ -694,8 +717,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
fid = NULL;
v9ses = v9fs_inode2v9ses(dir);
perm = unixmode2p9mode(v9ses, mode);
- if (nd && nd->flags & LOOKUP_OPEN)
- flags = nd->intent.open.flags - 1;
+ if (nd)
+ flags = nd->intent.open.flags;
else
flags = O_RDWR;
@@ -710,7 +733,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
v9fs_invalidate_inode_attr(dir);
/* if we are opening a file, assign the open fid to the file */
- if (nd && nd->flags & LOOKUP_OPEN) {
+ if (nd) {
v9inode = V9FS_I(dentry->d_inode);
mutex_lock(&v9inode->v_mutex);
if (v9ses->cache && !v9inode->writeback_fid &&
@@ -889,7 +912,7 @@ int v9fs_vfs_unlink(struct inode *i, struct dentry *d)
int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
{
- return v9fs_remove(i, d, 1);
+ return v9fs_remove(i, d, AT_REMOVEDIR);
}
/**
@@ -937,9 +960,12 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
down_write(&v9ses->rename_sem);
if (v9fs_proto_dotl(v9ses)) {
- retval = p9_client_rename(oldfid, newdirfid,
- (char *) new_dentry->d_name.name);
- if (retval != -ENOSYS)
+ retval = p9_client_renameat(olddirfid, old_dentry->d_name.name,
+ newdirfid, new_dentry->d_name.name);
+ if (retval == -EOPNOTSUPP)
+ retval = p9_client_rename(oldfid, newdirfid,
+ new_dentry->d_name.name);
+ if (retval != -EOPNOTSUPP)
goto clunk_newdir;
}
if (old_dentry->d_parent != new_dentry->d_parent) {
@@ -964,11 +990,6 @@ clunk_newdir:
clear_nlink(new_inode);
else
drop_nlink(new_inode);
- /*
- * Work around vfs rename rehash bug with
- * FS_RENAME_DOES_D_MOVE
- */
- v9fs_invalidate_inode_attr(new_inode);
}
if (S_ISDIR(old_inode->i_mode)) {
if (!new_inode)
@@ -1116,7 +1137,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
struct v9fs_session_info *v9ses = sb->s_fs_info;
struct v9fs_inode *v9inode = V9FS_I(inode);
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_atime.tv_sec = stat->atime;
inode->i_mtime.tv_sec = stat->mtime;
@@ -1142,7 +1163,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
/* HARDLINKCOUNT %u */
sscanf(ext, "%13s %u", tag_name, &i_nlink);
if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
- inode->i_nlink = i_nlink;
+ set_nlink(inode, i_nlink);
}
}
mode = stat->mode & S_IALLUGO;
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index c873172..dbbc83f 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -81,7 +81,7 @@ static struct dentry *v9fs_dentry_from_dir_inode(struct inode *inode)
spin_lock(&inode->i_lock);
/* Directory should have only one entry. */
BUG_ON(S_ISDIR(inode->i_mode) && !list_is_singular(&inode->i_dentry));
- dentry = list_entry(inode->i_dentry.next, struct dentry, d_alias);
+ dentry = list_entry(inode->i_dentry.next, struct dentry, d_u.d_alias);
spin_unlock(&inode->i_lock);
return dentry;
}
@@ -169,8 +169,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb,
unlock_new_inode(inode);
return inode;
error:
- unlock_new_inode(inode);
- iput(inode);
+ iget_failed(inode);
return ERR_PTR(retval);
}
@@ -259,7 +258,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
int err = 0;
gid_t gid;
int flags;
- mode_t mode;
+ umode_t mode;
char *name = NULL;
struct file *filp;
struct p9_qid qid;
@@ -271,8 +270,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
struct posix_acl *pacl = NULL, *dacl = NULL;
v9ses = v9fs_inode2v9ses(dir);
- if (nd && nd->flags & LOOKUP_OPEN)
- flags = nd->intent.open.flags - 1;
+ if (nd)
+ flags = nd->intent.open.flags;
else {
/*
* create call without LOOKUP_OPEN is due
@@ -402,7 +401,7 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
struct p9_fid *fid = NULL, *dfid = NULL;
gid_t gid;
char *name;
- mode_t mode;
+ umode_t mode;
struct inode *inode;
struct p9_qid qid;
struct dentry *dir_dentry;
@@ -606,7 +605,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
inode->i_ctime.tv_nsec = stat->st_ctime_nsec;
inode->i_uid = stat->st_uid;
inode->i_gid = stat->st_gid;
- inode->i_nlink = stat->st_nlink;
+ set_nlink(inode, stat->st_nlink);
mode = stat->st_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO;
@@ -632,7 +631,7 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode)
if (stat->st_result_mask & P9_STATS_GID)
inode->i_gid = stat->st_gid;
if (stat->st_result_mask & P9_STATS_NLINK)
- inode->i_nlink = stat->st_nlink;
+ set_nlink(inode, stat->st_nlink);
if (stat->st_result_mask & P9_STATS_MODE) {
inode->i_mode = stat->st_mode;
if ((S_ISBLK(inode->i_mode)) ||
@@ -805,7 +804,7 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
int err;
gid_t gid;
char *name;
- mode_t mode;
+ umode_t mode;
struct v9fs_session_info *v9ses;
struct p9_fid *fid = NULL, *dfid = NULL;
struct inode *inode;
@@ -977,7 +976,7 @@ const struct inode_operations v9fs_dir_inode_operations_dotl = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = v9fs_listxattr,
- .check_acl = v9fs_check_acl,
+ .get_acl = v9fs_iop_get_acl,
};
const struct inode_operations v9fs_file_inode_operations_dotl = {
@@ -987,7 +986,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = v9fs_listxattr,
- .check_acl = v9fs_check_acl,
+ .get_acl = v9fs_iop_get_acl,
};
const struct inode_operations v9fs_symlink_inode_operations_dotl = {
diff --git a/fs/Kconfig b/fs/Kconfig
index 88701cc..6ad58a5 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -109,7 +109,7 @@ source "fs/proc/Kconfig"
source "fs/sysfs/Kconfig"
config TMPFS
- bool "Virtual memory file system support (former shm fs)"
+ bool "Tmpfs virtual memory file system support (former shm fs)"
depends on SHMEM
help
Tmpfs is a file system which keeps all files in virtual memory.
@@ -127,14 +127,21 @@ config TMPFS_POSIX_ACL
select TMPFS_XATTR
select GENERIC_ACL
help
- POSIX Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
+ POSIX Access Control Lists (ACLs) support additional access rights
+ for users and groups beyond the standard owner/group/world scheme,
+ and this option selects support for ACLs specifically for tmpfs
+ filesystems.
+
+ If you've selected TMPFS, it's possible that you'll also need
+ this option as there are a number of Linux distros that require
+ POSIX ACL support under /dev for certain features to work properly.
+ For example, some distros need this feature for ALSA-related /dev
+ files for sound to work properly. In short, if you're not sure,
+ say Y.
To learn more about Access Control Lists, visit the POSIX ACLs for
Linux website <http://acl.bestbits.at/>.
- If you don't know what Access Control Lists are, say N.
-
config TMPFS_XATTR
bool "Tmpfs extended attributes"
depends on TMPFS
@@ -192,10 +199,6 @@ source "fs/hfsplus/Kconfig"
source "fs/befs/Kconfig"
source "fs/bfs/Kconfig"
source "fs/efs/Kconfig"
-
-# Patched by YAFFS
-source "fs/yaffs2/Kconfig"
-
source "fs/jffs2/Kconfig"
# UBIFS File system configuration
source "fs/ubifs/Kconfig"
@@ -215,6 +218,8 @@ source "fs/exofs/Kconfig"
endif # MISC_FILESYSTEMS
+source "fs/exofs/Kconfig.ore"
+
menuconfig NETWORK_FILESYSTEMS
bool "Network File Systems"
default y
diff --git a/fs/Makefile b/fs/Makefile
index 2999b4d..d2c3353 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -29,7 +29,6 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
-obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
@@ -121,9 +120,6 @@ obj-$(CONFIG_DEBUG_FS) += debugfs/
obj-$(CONFIG_OCFS2_FS) += ocfs2/
obj-$(CONFIG_BTRFS_FS) += btrfs/
obj-$(CONFIG_GFS2_FS) += gfs2/
-obj-$(CONFIG_EXOFS_FS) += exofs/
+obj-y += exofs/ # Multiple modules
obj-$(CONFIG_CEPH_FS) += ceph/
obj-$(CONFIG_PSTORE) += pstore/
-
-# Patched by YAFFS
-obj-$(CONFIG_YAFFS_FS) += yaffs2/
diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c
index d5250c5..1dab6a1 100644
--- a/fs/adfs/inode.c
+++ b/fs/adfs/inode.c
@@ -247,7 +247,7 @@ adfs_iget(struct super_block *sb, struct object_info *obj)
inode->i_gid = ADFS_SB(sb)->s_gid;
inode->i_ino = obj->file_id;
inode->i_size = obj->size;
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
inode->i_blocks = (inode->i_size + sb->s_blocksize - 1) >>
sb->s_blocksize_bits;
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index 0e95f73..c2b9c79 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -182,7 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent
void affs_free_prealloc(struct inode *inode);
extern void affs_truncate(struct inode *);
-int affs_file_fsync(struct file *, int);
+int affs_file_fsync(struct file *, loff_t, loff_t, int);
/* dir.c */
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index 3a4557e..43c05d8 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -132,7 +132,7 @@ affs_fix_dcache(struct dentry *dentry, u32 entry_ino)
head = &inode->i_dentry;
next = head->next;
while (next != head) {
- dentry = list_entry(next, struct dentry, d_alias);
+ dentry = list_entry(next, struct dentry, d_u.d_alias);
if (entry_ino == (u32)(long)dentry->d_fsdata) {
dentry->d_fsdata = data;
break;
@@ -215,7 +215,7 @@ affs_remove_link(struct dentry *dentry)
break;
default:
if (!AFFS_TAIL(sb, bh)->link_chain)
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
}
affs_free_block(sb, link_ino);
goto done;
@@ -316,7 +316,7 @@ affs_remove_header(struct dentry *dentry)
if (inode->i_nlink > 1)
retval = affs_remove_link(dentry);
else
- inode->i_nlink = 0;
+ clear_nlink(inode);
affs_unlock_link(inode);
inode->i_ctime = CURRENT_TIME_SEC;
mark_inode_dirty(inode);
diff --git a/fs/affs/file.c b/fs/affs/file.c
index acf321b..2f4c935 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -923,14 +923,20 @@ affs_truncate(struct inode *inode)
affs_free_prealloc(inode);
}
-int affs_file_fsync(struct file *filp, int datasync)
+int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
int ret, err;
+ err = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (err)
+ return err;
+
+ mutex_lock(&inode->i_mutex);
ret = write_inode_now(inode, 0);
err = sync_blockdev(inode->i_sb->s_bdev);
if (!ret)
ret = err;
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 5d82890..88a4b0b 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -54,7 +54,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
prot = be32_to_cpu(tail->protect);
inode->i_size = 0;
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_mode = 0;
AFFS_I(inode)->i_extcnt = 1;
AFFS_I(inode)->i_ext_last = ~1;
@@ -137,7 +137,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
sbi->s_hashsize + 1;
}
if (tail->link_chain)
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
inode->i_mapping->a_ops = (sbi->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
inode->i_op = &affs_file_inode_operations;
inode->i_fop = &affs_file_operations;
@@ -304,7 +304,7 @@ affs_new_inode(struct inode *dir)
inode->i_uid = current_fsuid();
inode->i_gid = current_fsgid();
inode->i_ino = block;
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
atomic_set(&AFFS_I(inode)->i_opencnt, 0);
AFFS_I(inode)->i_blkcnt = 0;
@@ -387,7 +387,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
AFFS_TAIL(sb, inode_bh)->link_chain = cpu_to_be32(block);
affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
mark_buffer_dirty_inode(inode_bh, inode);
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
ihold(inode);
}
affs_fix_checksum(sb, bh);
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index e3e9efc..780a11d 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -277,7 +277,7 @@ affs_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata
inode->i_mapping->a_ops = (AFFS_SB(sb)->s_flags & SF_OFS) ? &affs_aops_ofs : &affs_aops;
error = affs_add_entry(dir, inode, dentry, ST_FILE);
if (error) {
- inode->i_nlink = 0;
+ clear_nlink(inode);
iput(inode);
return error;
}
@@ -305,7 +305,7 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
error = affs_add_entry(dir, inode, dentry, ST_USERDIR);
if (error) {
- inode->i_nlink = 0;
+ clear_nlink(inode);
mark_inode_dirty(inode);
iput(inode);
return error;
@@ -392,7 +392,7 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
return 0;
err:
- inode->i_nlink = 0;
+ clear_nlink(inode);
mark_inode_dirty(inode);
iput(inode);
return error;
diff --git a/fs/afs/afs_vl.h b/fs/afs/afs_vl.h
index 8bbefe0..800f607 100644
--- a/fs/afs/afs_vl.h
+++ b/fs/afs/afs_vl.h
@@ -49,7 +49,7 @@ enum AFSVL_Errors {
AFSVL_BADVOLOPER = 363542, /* Bad volume operation code */
AFSVL_BADRELLOCKTYPE = 363543, /* Bad release lock type */
AFSVL_RERELEASE = 363544, /* Status report: last release was aborted */
- AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server °ag */
+ AFSVL_BADSERVERFLAG = 363545, /* Invalid replication site server flag */
AFSVL_PERM = 363546, /* No permission access */
AFSVL_NOMEM = 363547, /* malloc/realloc failed to alloc enough memory */
};
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 346e328..2f213d1 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -90,7 +90,7 @@ static void xdr_decode_AFSFetchStatus(const __be32 **_bp,
vnode->vfs_inode.i_uid = status->owner;
vnode->vfs_inode.i_gid = status->group;
vnode->vfs_inode.i_generation = vnode->fid.unique;
- vnode->vfs_inode.i_nlink = status->nlink;
+ set_nlink(&vnode->vfs_inode, status->nlink);
mode = vnode->vfs_inode.i_mode;
mode &= ~S_IALLUGO;
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 0fdab6e..d890ae3 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -67,7 +67,7 @@ static int afs_inode_map_status(struct afs_vnode *vnode, struct key *key)
fscache_attr_changed(vnode->cache);
#endif
- inode->i_nlink = vnode->status.nlink;
+ set_nlink(inode, vnode->status.nlink);
inode->i_uid = vnode->status.owner;
inode->i_gid = 0;
inode->i_size = vnode->status.size;
@@ -174,7 +174,7 @@ struct inode *afs_iget_autocell(struct inode *dir, const char *dev_name,
inode->i_size = 0;
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
inode->i_op = &afs_autocell_inode_operations;
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
inode->i_uid = 0;
inode->i_gid = 0;
inode->i_ctime.tv_sec = get_seconds();
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 1f3624d..a306bb6 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -627,7 +627,7 @@ extern void afs_clear_permits(struct afs_vnode *);
extern void afs_cache_permit(struct afs_vnode *, struct key *, long);
extern void afs_zap_permits(struct rcu_head *);
extern struct key *afs_request_key(struct afs_cell *);
-extern int afs_permission(struct inode *, int, unsigned int);
+extern int afs_permission(struct inode *, int);
/*
* server.c
@@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *);
extern ssize_t afs_file_write(struct kiocb *, const struct iovec *,
unsigned long, loff_t);
extern int afs_writeback_all(struct afs_vnode *);
-extern int afs_fsync(struct file *, int);
+extern int afs_fsync(struct file *, loff_t, loff_t, int);
/*****************************************************************************/
diff --git a/fs/afs/security.c b/fs/afs/security.c
index f44b9d3..8d01042 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -285,14 +285,14 @@ static int afs_check_permit(struct afs_vnode *vnode, struct key *key,
* - AFS ACLs are attached to directories only, and a file is controlled by its
* parent directory's ACL
*/
-int afs_permission(struct inode *inode, int mask, unsigned int flags)
+int afs_permission(struct inode *inode, int mask)
{
struct afs_vnode *vnode = AFS_FS_I(inode);
afs_access_t uninitialized_var(access);
struct key *key;
int ret;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
_enter("{{%x:%u},%lx},%x,",
@@ -350,7 +350,7 @@ int afs_permission(struct inode *inode, int mask, unsigned int flags)
}
key_put(key);
- ret = generic_permission(inode, mask, flags, NULL);
+ ret = generic_permission(inode, mask);
_leave(" = %d", ret);
return ret;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index b806285..9aa52d9 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode)
* - the return status from this call provides a reliable indication of
* whether any write errors occurred for this process.
*/
-int afs_fsync(struct file *file, int datasync)
+int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
+ struct inode *inode = file->f_mapping->host;
struct afs_writeback *wb, *xwb;
struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode);
int ret;
@@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync)
vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name,
datasync);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
/* use a writeback record as a marker in the queue - when this reaches
* the front of the queue, all the outstanding writes are either
* completed or rejected */
wb = kzalloc(sizeof(*wb), GFP_KERNEL);
- if (!wb)
- return -ENOMEM;
+ if (!wb) {
+ ret = -ENOMEM;
+ goto out;
+ }
wb->vnode = vnode;
wb->first = 0;
wb->last = -1;
@@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync)
if (ret < 0) {
afs_put_writeback(wb);
_leave(" = %d [wb]", ret);
- return ret;
+ goto out;
}
/* wait for the preceding writes to actually complete */
@@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync)
vnode->writebacks.next == &wb->link);
afs_put_writeback(wb);
_leave(" = %d", ret);
+out:
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 756d328..650d520 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -39,27 +39,17 @@
/* #define DEBUG */
-#ifdef DEBUG
-#define DPRINTK(fmt, args...) \
-do { \
- printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \
- current->pid, __func__, ##args); \
-} while (0)
-#else
-#define DPRINTK(fmt, args...) do {} while (0)
-#endif
-
-#define AUTOFS_WARN(fmt, args...) \
-do { \
+#define DPRINTK(fmt, ...) \
+ pr_debug("pid %d: %s: " fmt "\n", \
+ current->pid, __func__, ##__VA_ARGS__)
+
+#define AUTOFS_WARN(fmt, ...) \
printk(KERN_WARNING "pid %d: %s: " fmt "\n", \
- current->pid, __func__, ##args); \
-} while (0)
+ current->pid, __func__, ##__VA_ARGS__)
-#define AUTOFS_ERROR(fmt, args...) \
-do { \
+#define AUTOFS_ERROR(fmt, ...) \
printk(KERN_ERR "pid %d: %s: " fmt "\n", \
- current->pid, __func__, ##args); \
-} while (0)
+ current->pid, __func__, ##__VA_ARGS__)
/* Unified info structure. This is pointed to by both the dentry and
inode structures. Each file in the filesystem has an instance of this
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index de54271..62d7a6d 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -95,7 +95,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
*/
static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
{
- struct autofs_dev_ioctl tmp;
+ struct autofs_dev_ioctl tmp, *res;
if (copy_from_user(&tmp, in, sizeof(tmp)))
return ERR_PTR(-EFAULT);
@@ -103,7 +103,14 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
if (tmp.size < sizeof(tmp))
return ERR_PTR(-EINVAL);
- return memdup_user(in, tmp.size);
+ if (tmp.size > (PATH_MAX + sizeof(tmp)))
+ return ERR_PTR(-ENAMETOOLONG);
+
+ res = memdup_user(in, tmp.size);
+ if (!IS_ERR(res))
+ res->size = tmp.size;
+
+ return res;
}
static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 2c69d12..7fc0371 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -100,7 +100,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
p = prev;
spin_lock(&p->d_lock);
again:
- next = p->d_u.d_child.next;
+ next = p->d_child.next;
start:
if (next == &root->d_subdirs) {
spin_unlock(&p->d_lock);
@@ -109,7 +109,7 @@ start:
return NULL;
}
- q = list_entry(next, struct dentry, d_u.d_child);
+ q = list_entry(next, struct dentry, d_child);
spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
@@ -165,13 +165,13 @@ again:
goto relock;
}
spin_unlock(&p->d_lock);
- next = p->d_u.d_child.next;
+ next = p->d_child.next;
p = parent;
if (next != &parent->d_subdirs)
break;
}
}
- ret = list_entry(next, struct dentry, d_u.d_child);
+ ret = list_entry(next, struct dentry, d_child);
spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
@@ -455,7 +455,7 @@ found:
spin_lock(&sbi->lookup_lock);
spin_lock(&expired->d_parent->d_lock);
spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+ list_move(&expired->d_parent->d_subdirs, &expired->d_child);
spin_unlock(&expired->d_lock);
spin_unlock(&expired->d_parent->d_lock);
spin_unlock(&sbi->lookup_lock);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 7c26678..7b5293e 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -342,7 +342,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, mode_t mode)
inode->i_ino = get_next_ino();
if (S_ISDIR(mode)) {
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
inode->i_op = &autofs4_dir_inode_operations;
inode->i_fop = &autofs4_dir_operations;
} else if (S_ISLNK(mode)) {
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 790fa63..2e936c6 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -651,7 +651,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
/* only consider parents below dentrys in the root */
if (IS_ROOT(parent->d_parent))
return;
- d_child = &dentry->d_u.d_child;
+ d_child = &dentry->d_child;
/* Set parent managed if it's becoming empty */
if (d_child->next == &parent->d_subdirs &&
d_child->prev == &parent->d_subdirs)
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 813ea10..e1fbdee 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -90,7 +90,7 @@ static int autofs4_write(struct file *file, const void *addr, int bytes)
return (bytes > 0);
}
-
+
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
@@ -104,7 +104,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
size_t pktsz;
DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
- wq->wait_queue_token, wq->name.len, wq->name.name, type);
+ (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
memset(&pkt,0,sizeof pkt); /* For security reasons */
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 720d885..8342ca6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -357,7 +357,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino)
inode->i_gid = befs_sb->mount_opts.use_gid ?
befs_sb->mount_opts.gid : (gid_t) fs32_to_cpu(sb, raw_inode->gid);
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
/*
* BEFS's time is 64 bits, but current VFS is 32 bits...
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index b14cebf..9cc0740 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -199,7 +199,7 @@ static int bfs_unlink(struct inode *dir, struct dentry *dentry)
printf("unlinking non-existent file %s:%lu (nlink=%d)\n",
inode->i_sb->s_id, inode->i_ino,
inode->i_nlink);
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
}
de->ino = 0;
mark_buffer_dirty_inode(bh, dir);
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index a8e37f8..697af5b 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -78,7 +78,7 @@ struct inode *bfs_iget(struct super_block *sb, unsigned long ino)
BFS_I(inode)->i_dsk_ino = le16_to_cpu(di->i_ino);
inode->i_uid = le32_to_cpu(di->i_uid);
inode->i_gid = le32_to_cpu(di->i_gid);
- inode->i_nlink = le32_to_cpu(di->i_nlink);
+ set_nlink(inode, le32_to_cpu(di->i_nlink));
inode->i_size = BFS_FILESIZE(di);
inode->i_blocks = BFS_FILEBLOCKS(di);
inode->i_atime.tv_sec = le32_to_cpu(di->i_atime);
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index a2603e7..622f469 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -129,8 +129,6 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
!root->d_inode->i_op->mkdir ||
!root->d_inode->i_op->setxattr ||
!root->d_inode->i_op->getxattr ||
- !root->d_sb ||
- !root->d_sb->s_op ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs)
goto error_unsupported;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index 0e3c092..b4d2438 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -918,7 +918,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
* own time */
dget(object->backer);
mntget(cache->mnt);
- file = dentry_open(object->backer, cache->mnt, O_RDWR,
+ file = dentry_open(object->backer, cache->mnt, O_RDWR | O_LARGEFILE,
cache->cache_cred);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 5a3953d..173b1d2 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -87,7 +87,7 @@ static int ceph_set_page_dirty(struct page *page)
snapc = ceph_get_snap_context(ci->i_snap_realm->cached_context);
/* dirty the head */
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_head_snapc == NULL)
ci->i_head_snapc = ceph_get_snap_context(snapc);
++ci->i_wrbuffer_ref_head;
@@ -100,7 +100,7 @@ static int ceph_set_page_dirty(struct page *page)
ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
snapc, snapc->seq, snapc->num_snaps);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
/* now adjust page */
spin_lock_irq(&mapping->tree_lock);
@@ -228,102 +228,155 @@ static int ceph_readpage(struct file *filp, struct page *page)
}
/*
- * Build a vector of contiguous pages from the provided page list.
+ * Finish an async read(ahead) op.
*/
-static struct page **page_vector_from_list(struct list_head *page_list,
- unsigned *nr_pages)
+static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
{
- struct page **pages;
- struct page *page;
- int next_index, contig_pages = 0;
+ struct inode *inode = req->r_inode;
+ struct ceph_osd_reply_head *replyhead;
+ int rc, bytes;
+ int i;
- /* build page vector */
- pages = kmalloc(sizeof(*pages) * *nr_pages, GFP_NOFS);
- if (!pages)
- return ERR_PTR(-ENOMEM);
+ /* parse reply */
+ replyhead = msg->front.iov_base;
+ WARN_ON(le32_to_cpu(replyhead->num_ops) == 0);
+ rc = le32_to_cpu(replyhead->result);
+ bytes = le32_to_cpu(msg->hdr.data_len);
- BUG_ON(list_empty(page_list));
- next_index = list_entry(page_list->prev, struct page, lru)->index;
- list_for_each_entry_reverse(page, page_list, lru) {
- if (page->index == next_index) {
- dout("readpages page %d %p\n", contig_pages, page);
- pages[contig_pages] = page;
- contig_pages++;
- next_index++;
- } else {
- break;
+ dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
+
+ /* unlock all pages, zeroing any data we didn't read */
+ for (i = 0; i < req->r_num_pages; i++, bytes -= PAGE_CACHE_SIZE) {
+ struct page *page = req->r_pages[i];
+
+ if (bytes < (int)PAGE_CACHE_SIZE) {
+ /* zero (remainder of) page */
+ int s = bytes < 0 ? 0 : bytes;
+ zero_user_segment(page, s, PAGE_CACHE_SIZE);
}
+ dout("finish_read %p uptodate %p idx %lu\n", inode, page,
+ page->index);
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
}
- *nr_pages = contig_pages;
- return pages;
+ kfree(req->r_pages);
}
/*
- * Read multiple pages. Leave pages we don't read + unlock in page_list;
- * the caller (VM) cleans them up.
+ * start an async read(ahead) operation. return nr_pages we submitted
+ * a read for on success, or negative error code.
*/
-static int ceph_readpages(struct file *file, struct address_space *mapping,
- struct list_head *page_list, unsigned nr_pages)
+static int start_read(struct inode *inode, struct list_head *page_list, int max)
{
- struct inode *inode = file->f_dentry->d_inode;
- struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
- int rc = 0;
- struct page **pages;
- loff_t offset;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct page *page = list_entry(page_list->prev, struct page, lru);
+ struct ceph_osd_request *req;
+ u64 off;
u64 len;
+ int i;
+ struct page **pages;
+ pgoff_t next_index;
+ int nr_pages = 0;
+ int ret;
- dout("readpages %p file %p nr_pages %d\n",
- inode, file, nr_pages);
-
- pages = page_vector_from_list(page_list, &nr_pages);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
+ off = page->index << PAGE_CACHE_SHIFT;
- /* guess read extent */
- offset = pages[0]->index << PAGE_CACHE_SHIFT;
+ /* count pages */
+ next_index = page->index;
+ list_for_each_entry_reverse(page, page_list, lru) {
+ if (page->index != next_index)
+ break;
+ nr_pages++;
+ next_index++;
+ if (max && nr_pages == max)
+ break;
+ }
len = nr_pages << PAGE_CACHE_SHIFT;
- rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
- offset, &len,
- ci->i_truncate_seq, ci->i_truncate_size,
- pages, nr_pages, 0);
- if (rc == -ENOENT)
- rc = 0;
- if (rc < 0)
- goto out;
-
- for (; !list_empty(page_list) && len > 0;
- rc -= PAGE_CACHE_SIZE, len -= PAGE_CACHE_SIZE) {
- struct page *page =
- list_entry(page_list->prev, struct page, lru);
+ dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
+ off, len);
+
+ req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode),
+ off, &len,
+ CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
+ NULL, 0,
+ ci->i_truncate_seq, ci->i_truncate_size,
+ NULL, false, 1, 0);
+ if (!req)
+ return -ENOMEM;
+ /* build page vector */
+ nr_pages = len >> PAGE_CACHE_SHIFT;
+ pages = kmalloc(sizeof(*pages) * nr_pages, GFP_NOFS);
+ ret = -ENOMEM;
+ if (!pages)
+ goto out;
+ for (i = 0; i < nr_pages; ++i) {
+ page = list_entry(page_list->prev, struct page, lru);
+ BUG_ON(PageLocked(page));
list_del(&page->lru);
-
- if (rc < (int)PAGE_CACHE_SIZE) {
- /* zero (remainder of) page */
- int s = rc < 0 ? 0 : rc;
- zero_user_segment(page, s, PAGE_CACHE_SIZE);
- }
-
- if (add_to_page_cache_lru(page, mapping, page->index,
+
+ dout("start_read %p adding %p idx %lu\n", inode, page,
+ page->index);
+ if (add_to_page_cache_lru(page, &inode->i_data, page->index,
GFP_NOFS)) {
page_cache_release(page);
- dout("readpages %p add_to_page_cache failed %p\n",
+ dout("start_read %p add_to_page_cache failed %p\n",
inode, page);
- continue;
+ nr_pages = i;
+ goto out_pages;
}
- dout("readpages %p adding %p idx %lu\n", inode, page,
- page->index);
- flush_dcache_page(page);
- SetPageUptodate(page);
- unlock_page(page);
- page_cache_release(page);
+ pages[i] = page;
}
- rc = 0;
+ req->r_pages = pages;
+ req->r_num_pages = nr_pages;
+ req->r_callback = finish_read;
+ req->r_inode = inode;
+
+ dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
+ ret = ceph_osdc_start_request(osdc, req, false);
+ if (ret < 0)
+ goto out_pages;
+ ceph_osdc_put_request(req);
+ return nr_pages;
+out_pages:
+ ceph_release_page_vector(pages, nr_pages);
+out:
+ ceph_osdc_put_request(req);
+ return ret;
+}
+
+
+/*
+ * Read multiple pages. Leave pages we don't read + unlock in page_list;
+ * the caller (VM) cleans them up.
+ */
+static int ceph_readpages(struct file *file, struct address_space *mapping,
+ struct list_head *page_list, unsigned nr_pages)
+{
+ struct inode *inode = file->f_dentry->d_inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ int rc = 0;
+ int max = 0;
+
+ if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
+ max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+ >> PAGE_SHIFT;
+
+ dout("readpages %p file %p nr_pages %d max %d\n", inode, file, nr_pages,
+ max);
+ while (!list_empty(page_list)) {
+ rc = start_read(inode, page_list, max);
+ if (rc < 0)
+ goto out;
+ BUG_ON(rc == 0);
+ }
out:
- kfree(pages);
+ dout("readpages %p file %p ret %d\n", inode, file, rc);
return rc;
}
@@ -338,7 +391,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
struct ceph_snap_context *snapc = NULL;
struct ceph_cap_snap *capsnap = NULL;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
capsnap->context, capsnap->dirty_pages);
@@ -354,7 +407,7 @@ static struct ceph_snap_context *get_oldest_context(struct inode *inode,
dout(" head snapc %p has %d dirty pages\n",
snapc, ci->i_wrbuffer_ref_head);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return snapc;
}
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index f605753..8b53193 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -309,7 +309,7 @@ void ceph_reservation_status(struct ceph_fs_client *fsc,
/*
* Find ceph_cap for given mds, if any.
*
- * Called with i_lock held.
+ * Called with i_ceph_lock held.
*/
static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{
@@ -332,9 +332,9 @@ struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
{
struct ceph_cap *cap;
- spin_lock(&ci->vfs_inode.i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ci, mds);
- spin_unlock(&ci->vfs_inode.i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return cap;
}
@@ -361,15 +361,16 @@ static int __ceph_get_cap_mds(struct ceph_inode_info *ci)
int ceph_get_cap_mds(struct inode *inode)
{
+ struct ceph_inode_info *ci = ceph_inode(inode);
int mds;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
mds = __ceph_get_cap_mds(ceph_inode(inode));
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return mds;
}
/*
- * Called under i_lock.
+ * Called under i_ceph_lock.
*/
static void __insert_cap_node(struct ceph_inode_info *ci,
struct ceph_cap *new)
@@ -415,7 +416,7 @@ static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
*
* If I_FLUSH is set, leave the inode at the front of the list.
*
- * Caller holds i_lock
+ * Caller holds i_ceph_lock
* -> we take mdsc->cap_delay_lock
*/
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
@@ -457,7 +458,7 @@ static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
/*
* Cancel delayed work on cap.
*
- * Caller must hold i_lock.
+ * Caller must hold i_ceph_lock.
*/
static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
@@ -487,17 +488,15 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
ci->i_rdcache_gen++;
/*
- * if we are newly issued FILE_SHARED, clear I_COMPLETE; we
+ * if we are newly issued FILE_SHARED, clear D_COMPLETE; we
* don't know what happened to this directory while we didn't
* have the cap.
*/
if ((issued & CEPH_CAP_FILE_SHARED) &&
(had & CEPH_CAP_FILE_SHARED) == 0) {
ci->i_shared_gen++;
- if (S_ISDIR(ci->vfs_inode.i_mode)) {
- dout(" marking %p NOT complete\n", &ci->vfs_inode);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
- }
+ if (S_ISDIR(ci->vfs_inode.i_mode))
+ ceph_dir_clear_complete(&ci->vfs_inode);
}
}
@@ -534,14 +533,14 @@ int ceph_add_cap(struct inode *inode,
wanted |= ceph_caps_for_mode(fmode);
retry:
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ci, mds);
if (!cap) {
if (new_cap) {
cap = new_cap;
new_cap = NULL;
} else {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
new_cap = get_cap(mdsc, caps_reservation);
if (new_cap == NULL)
return -ENOMEM;
@@ -627,7 +626,7 @@ retry:
if (fmode >= 0)
__ceph_get_fmode(ci, fmode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
wake_up_all(&ci->i_cap_wq);
return 0;
}
@@ -794,7 +793,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
struct rb_node *p;
int ret = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
if (__cap_is_valid(cap) &&
@@ -803,7 +802,7 @@ int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
break;
}
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("ceph_caps_revoking %p %s = %d\n", inode,
ceph_cap_string(mask), ret);
return ret;
@@ -857,7 +856,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
}
/*
- * called under i_lock
+ * called under i_ceph_lock
*/
static int __ceph_is_any_caps(struct ceph_inode_info *ci)
{
@@ -867,7 +866,7 @@ static int __ceph_is_any_caps(struct ceph_inode_info *ci)
/*
* Remove a cap. Take steps to deal with a racing iterate_session_caps.
*
- * caller should hold i_lock.
+ * caller should hold i_ceph_lock.
* caller will not hold session s_mutex if called from destroy_inode.
*/
void __ceph_remove_cap(struct ceph_cap *cap)
@@ -945,7 +944,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
if (!msg)
return -ENOMEM;
@@ -1030,7 +1029,7 @@ static void __queue_cap_release(struct ceph_mds_session *session,
/*
* Queue cap releases when an inode is dropped from our cache. Since
- * inode is about to be destroyed, there is no need for i_lock.
+ * inode is about to be destroyed, there is no need for i_ceph_lock.
*/
void ceph_queue_caps_release(struct inode *inode)
{
@@ -1051,7 +1050,7 @@ void ceph_queue_caps_release(struct inode *inode)
/*
* Send a cap msg on the given inode. Update our caps state, then
- * drop i_lock and send the message.
+ * drop i_ceph_lock and send the message.
*
* Make note of max_size reported/requested from mds, revoked caps
* that have now been implemented.
@@ -1063,13 +1062,13 @@ void ceph_queue_caps_release(struct inode *inode)
* Return non-zero if delayed release, or we experienced an error
* such that the caller should requeue + retry later.
*
- * called with i_lock, then drops it.
+ * called with i_ceph_lock, then drops it.
* caller should hold snap_rwsem (read), s_mutex.
*/
static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
int op, int used, int want, int retain, int flushing,
unsigned *pflush_tid)
- __releases(cap->ci->vfs_inode->i_lock)
+ __releases(cap->ci->i_ceph_lock)
{
struct ceph_inode_info *ci = cap->ci;
struct inode *inode = &ci->vfs_inode;
@@ -1172,7 +1171,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
xattr_version = ci->i_xattrs.version;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
@@ -1200,13 +1199,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
* Unless @again is true, skip cap_snaps that were already sent to
* the MDS (i.e., during this session).
*
- * Called under i_lock. Takes s_mutex as needed.
+ * Called under i_ceph_lock. Takes s_mutex as needed.
*/
void __ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession,
int again)
- __releases(ci->vfs_inode->i_lock)
- __acquires(ci->vfs_inode->i_lock)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
{
struct inode *inode = &ci->vfs_inode;
int mds;
@@ -1263,7 +1262,7 @@ retry:
session = NULL;
}
if (!session) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
mutex_lock(&mdsc->mutex);
session = __ceph_lookup_mds_session(mdsc, mds);
mutex_unlock(&mdsc->mutex);
@@ -1277,7 +1276,7 @@ retry:
* deletion or migration. retry, and we'll
* get a better @mds value next time.
*/
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
goto retry;
}
@@ -1287,7 +1286,7 @@ retry:
list_del_init(&capsnap->flushing_item);
list_add_tail(&capsnap->flushing_item,
&session->s_cap_snaps_flushing);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
inode, capsnap, capsnap->follows, capsnap->flush_tid);
@@ -1304,7 +1303,7 @@ retry:
next_follows = capsnap->follows + 1;
ceph_put_cap_snap(capsnap);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
goto retry;
}
@@ -1324,11 +1323,9 @@ out:
static void ceph_flush_snaps(struct ceph_inode_info *ci)
{
- struct inode *inode = &ci->vfs_inode;
-
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__ceph_flush_snaps(ci, NULL, 0);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
/*
@@ -1375,7 +1372,7 @@ int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask)
* Add dirty inode to the flushing list. Assigned a seq number so we
* can wait for caps to flush without starving.
*
- * Called under i_lock.
+ * Called under i_ceph_lock.
*/
static int __mark_caps_flushing(struct inode *inode,
struct ceph_mds_session *session)
@@ -1423,9 +1420,9 @@ static int try_nonblocking_invalidate(struct inode *inode)
struct ceph_inode_info *ci = ceph_inode(inode);
u32 invalidating_gen = ci->i_rdcache_gen;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
invalidate_mapping_pages(&inode->i_data, 0, -1);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (inode->i_data.nrpages == 0 &&
invalidating_gen == ci->i_rdcache_gen) {
@@ -1472,7 +1469,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
if (mdsc->stopping)
is_delayed = 1;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_FLUSH)
flags |= CHECK_CAPS_FLUSH;
@@ -1482,7 +1479,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
__ceph_flush_snaps(ci, &session, 0);
goto retry_locked;
retry:
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
retry_locked:
file_wanted = __ceph_caps_file_wanted(ci);
used = __ceph_caps_used(ci);
@@ -1636,7 +1633,7 @@ ack:
if (mutex_trylock(&session->s_mutex) == 0) {
dout("inverting session/ino locks on %p\n",
session);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (took_snap_rwsem) {
up_read(&mdsc->snap_rwsem);
took_snap_rwsem = 0;
@@ -1650,7 +1647,7 @@ ack:
if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
dout("inverting snap/in locks on %p\n",
inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
down_read(&mdsc->snap_rwsem);
took_snap_rwsem = 1;
goto retry;
@@ -1666,10 +1663,10 @@ ack:
mds = cap->mds; /* remember mds, so we don't repeat */
sent++;
- /* __send_cap drops i_lock */
+ /* __send_cap drops i_ceph_lock */
delayed += __send_cap(mdsc, cap, CEPH_CAP_OP_UPDATE, used, want,
retain, flushing, NULL);
- goto retry; /* retake i_lock and restart our cap scan. */
+ goto retry; /* retake i_ceph_lock and restart our cap scan. */
}
/*
@@ -1683,7 +1680,7 @@ ack:
else if (!is_delayed || force_requeue)
__cap_delay_requeue(mdsc, ci);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (queue_invalidate)
ceph_queue_invalidate(inode);
@@ -1706,7 +1703,7 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session,
int flushing = 0;
retry:
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_ceph_flags & CEPH_I_NOFLUSH) {
dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode);
goto out;
@@ -1718,7 +1715,7 @@ retry:
int delayed;
if (!session) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
session = cap->session;
mutex_lock(&session->s_mutex);
goto retry;
@@ -1729,18 +1726,18 @@ retry:
flushing = __mark_caps_flushing(inode, session);
- /* __send_cap drops i_lock */
+ /* __send_cap drops i_ceph_lock */
delayed = __send_cap(mdsc, cap, CEPH_CAP_OP_FLUSH, used, want,
cap->issued | cap->implemented, flushing,
flush_tid);
if (!delayed)
goto out_unlocked;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__cap_delay_requeue(mdsc, ci);
}
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
out_unlocked:
if (session && unlock_session)
mutex_unlock(&session->s_mutex);
@@ -1755,7 +1752,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
struct ceph_inode_info *ci = ceph_inode(inode);
int i, ret = 1;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
for (i = 0; i < CEPH_CAP_BITS; i++)
if ((ci->i_flushing_caps & (1 << i)) &&
ci->i_cap_flush_tid[i] <= tid) {
@@ -1763,7 +1760,7 @@ static int caps_are_flushed(struct inode *inode, unsigned tid)
ret = 0;
break;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return ret;
}
@@ -1811,7 +1808,7 @@ out:
spin_unlock(&ci->i_unsafe_lock);
}
-int ceph_fsync(struct file *file, int datasync)
+int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1822,9 +1819,10 @@ int ceph_fsync(struct file *file, int datasync)
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
sync_write_wait(inode);
- ret = filemap_write_and_wait(inode->i_mapping);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret < 0)
return ret;
+ mutex_lock(&inode->i_mutex);
dirty = try_flush_caps(inode, NULL, &flush_tid);
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
@@ -1841,6 +1839,7 @@ int ceph_fsync(struct file *file, int datasync)
}
dout("fsync %p%s done\n", inode, datasync ? " datasync" : "");
+ mutex_unlock(&inode->i_mutex);
return ret;
}
@@ -1868,10 +1867,10 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
struct ceph_mds_client *mdsc =
ceph_sb_to_client(inode->i_sb)->mdsc;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (__ceph_caps_dirty(ci))
__cap_delay_requeue_front(mdsc, ci);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
return err;
}
@@ -1894,7 +1893,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
struct inode *inode = &ci->vfs_inode;
struct ceph_cap *cap;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = ci->i_auth_cap;
if (cap && cap->session == session) {
dout("kick_flushing_caps %p cap %p capsnap %p\n", inode,
@@ -1904,7 +1903,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc,
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
}
@@ -1921,7 +1920,7 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_cap *cap;
int delayed = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = ci->i_auth_cap;
if (cap && cap->session == session) {
dout("kick_flushing_caps %p cap %p %s\n", inode,
@@ -1932,14 +1931,14 @@ void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
cap->issued | cap->implemented,
ci->i_flushing_caps, NULL);
if (delayed) {
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__cap_delay_requeue(mdsc, ci);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
} else {
pr_err("%p auth cap %p not mds%d ???\n", inode,
cap, session->s_mds);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
}
}
@@ -1952,7 +1951,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
struct ceph_cap *cap;
int delayed = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = ci->i_auth_cap;
dout("kick_flushing_inode_caps %p flushing %s flush_seq %lld\n", inode,
ceph_cap_string(ci->i_flushing_caps), ci->i_cap_flush_seq);
@@ -1964,12 +1963,12 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
cap->issued | cap->implemented,
ci->i_flushing_caps, NULL);
if (delayed) {
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__cap_delay_requeue(mdsc, ci);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
} else {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
}
@@ -1978,7 +1977,7 @@ static void kick_flushing_inode_caps(struct ceph_mds_client *mdsc,
* Take references to capabilities we hold, so that we don't release
* them to the MDS prematurely.
*
- * Protected by i_lock.
+ * Protected by i_ceph_lock.
*/
static void __take_cap_refs(struct ceph_inode_info *ci, int got)
{
@@ -2016,7 +2015,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
dout("get_cap_refs %p need %s want %s\n", inode,
ceph_cap_string(need), ceph_cap_string(want));
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
/* make sure file is actually open */
file_wanted = __ceph_caps_file_wanted(ci);
@@ -2077,7 +2076,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
ceph_cap_string(have), ceph_cap_string(need));
}
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("get_cap_refs %p ret %d got %s\n", inode,
ret, ceph_cap_string(*got));
return ret;
@@ -2094,7 +2093,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
int check = 0;
/* do we need to explicitly request a larger max_size? */
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if ((endoff >= ci->i_max_size ||
endoff > (inode->i_size << 1)) &&
endoff > ci->i_wanted_max_size) {
@@ -2103,7 +2102,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
ci->i_wanted_max_size = endoff;
check = 1;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (check)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
}
@@ -2140,9 +2139,9 @@ retry:
*/
void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
{
- spin_lock(&ci->vfs_inode.i_lock);
+ spin_lock(&ci->i_ceph_lock);
__take_cap_refs(ci, caps);
- spin_unlock(&ci->vfs_inode.i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
/*
@@ -2160,7 +2159,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
int last = 0, put = 0, flushsnaps = 0, wake = 0;
struct ceph_cap_snap *capsnap;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (had & CEPH_CAP_PIN)
--ci->i_pin_ref;
if (had & CEPH_CAP_FILE_RD)
@@ -2193,7 +2192,7 @@ void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
}
}
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
last ? " last" : "", put ? " put" : "");
@@ -2225,7 +2224,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
int found = 0;
struct ceph_cap_snap *capsnap = NULL;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_wrbuffer_ref -= nr;
last = !ci->i_wrbuffer_ref;
@@ -2274,7 +2273,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
}
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (last) {
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -2291,7 +2290,7 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
* Handle a cap GRANT message from the MDS. (Note that a GRANT may
* actually be a revocation if it specifies a smaller cap set.)
*
- * caller holds s_mutex and i_lock, we drop both.
+ * caller holds s_mutex and i_ceph_lock, we drop both.
*
* return value:
* 0 - ok
@@ -2302,7 +2301,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
struct ceph_mds_session *session,
struct ceph_cap *cap,
struct ceph_buffer *xattr_buf)
- __releases(inode->i_lock)
+ __releases(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -2361,7 +2360,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
}
if ((issued & CEPH_CAP_LINK_EXCL) == 0)
- inode->i_nlink = le32_to_cpu(grant->nlink);
+ set_nlink(inode, le32_to_cpu(grant->nlink));
if ((issued & CEPH_CAP_XATTR_EXCL) == 0 && grant->xattr_len) {
int len = le32_to_cpu(grant->xattr_len);
@@ -2453,7 +2452,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
}
BUG_ON(cap->issued & ~cap->implemented);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (writeback)
/*
* queue inode for writeback: we can't actually call
@@ -2483,7 +2482,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
struct ceph_mds_caps *m,
struct ceph_mds_session *session,
struct ceph_cap *cap)
- __releases(inode->i_lock)
+ __releases(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
@@ -2539,7 +2538,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
wake_up_all(&ci->i_cap_wq);
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (drop)
iput(inode);
}
@@ -2562,7 +2561,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
inode, ci, session->s_mds, follows);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
if (capsnap->follows == follows) {
if (capsnap->flush_tid != flush_tid) {
@@ -2585,7 +2584,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
capsnap, capsnap->follows);
}
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (drop)
iput(inode);
}
@@ -2598,7 +2597,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
static void handle_cap_trunc(struct inode *inode,
struct ceph_mds_caps *trunc,
struct ceph_mds_session *session)
- __releases(inode->i_lock)
+ __releases(ci->i_ceph_lock)
{
struct ceph_inode_info *ci = ceph_inode(inode);
int mds = session->s_mds;
@@ -2617,7 +2616,7 @@ static void handle_cap_trunc(struct inode *inode,
inode, mds, seq, truncate_size, truncate_seq);
queue_trunc = ceph_fill_file_size(inode, issued,
truncate_seq, truncate_size, size);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (queue_trunc)
ceph_queue_vmtruncate(inode);
@@ -2646,7 +2645,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
dout("handle_cap_export inode %p ci %p mds%d mseq %d\n",
inode, ci, mds, mseq);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
/* make sure we haven't seen a higher mseq */
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
@@ -2690,7 +2689,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
}
/* else, we already released it */
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
/*
@@ -2745,9 +2744,9 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
up_read(&mdsc->snap_rwsem);
/* make sure we re-request max_size, if necessary */
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_requested_max_size = 0;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
/*
@@ -2762,6 +2761,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
struct ceph_mds_client *mdsc = session->s_mdsc;
struct super_block *sb = mdsc->fsc->sb;
struct inode *inode;
+ struct ceph_inode_info *ci;
struct ceph_cap *cap;
struct ceph_mds_caps *h;
int mds = session->s_mds;
@@ -2815,6 +2815,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
/* lookup ino */
inode = ceph_find_inode(sb, vino);
+ ci = ceph_inode(inode);
dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
vino.snap, inode);
if (!inode) {
@@ -2844,16 +2845,16 @@ void ceph_handle_caps(struct ceph_mds_session *session,
}
/* the rest require a cap */
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = __get_cap_for_mds(ceph_inode(inode), mds);
if (!cap) {
dout(" no cap on %p ino %llx.%llx from mds%d\n",
inode, ceph_ino(inode), ceph_snap(inode), mds);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
goto flush_cap_releases;
}
- /* note that each of these drops i_lock for us */
+ /* note that each of these drops i_ceph_lock for us */
switch (op) {
case CEPH_CAP_OP_REVOKE:
case CEPH_CAP_OP_GRANT:
@@ -2869,7 +2870,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
break;
default:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
ceph_cap_op_name(op));
}
@@ -2962,13 +2963,13 @@ void ceph_put_fmode(struct ceph_inode_info *ci, int fmode)
struct inode *inode = &ci->vfs_inode;
int last = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout("put_fmode %p fmode %d %d -> %d\n", inode, fmode,
ci->i_nr_by_mode[fmode], ci->i_nr_by_mode[fmode]-1);
BUG_ON(ci->i_nr_by_mode[fmode] == 0);
if (--ci->i_nr_by_mode[fmode] == 0)
last++;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (last && ci->i_vino.snap == CEPH_NOSNAP)
ceph_check_caps(ci, 0, NULL);
@@ -2991,7 +2992,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
int used, dirty;
int ret = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
used = __ceph_caps_used(ci);
dirty = __ceph_caps_dirty(ci);
@@ -3046,7 +3047,7 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
inode, cap, ceph_cap_string(cap->issued));
}
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return ret;
}
@@ -3061,7 +3062,7 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
/*
* force an record for the directory caps if we have a dentry lease.
- * this is racy (can't take i_lock and d_lock together), but it
+ * this is racy (can't take i_ceph_lock and d_lock together), but it
* doesn't have to be perfect; the mds will revoke anything we don't
* release.
*/
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 0dba691..fb962ef 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -102,7 +102,7 @@ static int mdsc_show(struct seq_file *s, void *p)
path = NULL;
spin_lock(&req->r_old_dentry->d_lock);
seq_printf(s, " #%llx/%.*s (%s)",
- ceph_ino(req->r_old_dentry->d_parent->d_inode),
+ ceph_ino(req->r_old_dentry_dir),
req->r_old_dentry->d_name.len,
req->r_old_dentry->d_name.name,
path ? path : "");
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index ef8f08c..7903e62 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
- ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
- d_set_d_op(dentry, &ceph_dentry_ops);
- else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
- d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
- else
- d_set_d_op(dentry, &ceph_snap_dentry_ops);
-
di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO);
if (!di)
return -ENOMEM; /* oh well */
@@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry)
kmem_cache_free(ceph_dentry_cachep, di);
goto out_unlock;
}
+
+ if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
+ ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ d_set_d_op(dentry, &ceph_dentry_ops);
+ else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
+ d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
+ else
+ d_set_d_op(dentry, &ceph_snap_dentry_ops);
+
di->dentry = dentry;
di->lease_session = NULL;
- dentry->d_fsdata = di;
dentry->d_time = jiffies;
+ /* avoid reordering d_fsdata setup so that the check above is safe */
+ smp_mb();
+ dentry->d_fsdata = di;
ceph_dentry_lru_add(dentry);
out_unlock:
spin_unlock(&dentry->d_lock);
return 0;
}
+struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
+{
+ struct inode *inode = NULL;
+
+ if (!dentry)
+ return NULL;
+
+ spin_lock(&dentry->d_lock);
+ if (dentry->d_parent) {
+ inode = dentry->d_parent->d_inode;
+ ihold(inode);
+ }
+ spin_unlock(&dentry->d_lock);
+ return inode;
+}
/*
@@ -86,11 +104,11 @@ static unsigned fpos_off(loff_t p)
/*
* When possible, we try to satisfy a readdir by peeking at the
* dcache. We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
+ * d_child when we initially get results back from the MDS, and
* falling back to a "normal" sync readdir if any dentries in the dir
* are dropped.
*
- * I_COMPLETE tells indicates we have all dentries in the dir. It is
+ * D_COMPLETE tells indicates we have all dentries in the dir. It is
* defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by
* the MDS if/when the directory is modified).
*/
@@ -122,18 +140,18 @@ static int __dcache_readdir(struct file *filp,
p = parent->d_subdirs.prev;
dout(" initial p %p/%p\n", p->prev, p->next);
} else {
- p = last->d_u.d_child.prev;
+ p = last->d_child.prev;
}
more:
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
while (1) {
dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
d_unhashed(dentry) ? "!hashed" : "hashed",
parent->d_subdirs.prev, parent->d_subdirs.next);
if (p == &parent->d_subdirs) {
- fi->at_end = 1;
+ fi->flags |= CEPH_F_ATEND;
goto out_unlock;
}
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -148,7 +166,7 @@ more:
!dentry->d_inode ? " null" : "");
spin_unlock(&dentry->d_lock);
p = p->prev;
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
}
@@ -181,8 +199,8 @@ more:
filp->f_pos++;
/* make sure a dentry wasn't dropped while we didn't have parent lock */
- if (!ceph_i_test(dir, CEPH_I_COMPLETE)) {
- dout(" lost I_COMPLETE on %p; falling back to mds\n", dir);
+ if (!ceph_dir_test_complete(dir)) {
+ dout(" lost D_COMPLETE on %p; falling back to mds\n", dir);
err = -EAGAIN;
goto out;
}
@@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
const int max_bytes = fsc->mount_options->max_readdir_bytes;
dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off);
- if (fi->at_end)
+ if (fi->flags & CEPH_F_ATEND)
return 0;
/* always start with . and .. */
@@ -252,7 +270,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
off = 1;
}
if (filp->f_pos == 1) {
- ino_t ino = filp->f_dentry->d_parent->d_inode->i_ino;
+ ino_t ino = parent_ino(filp->f_dentry);
dout("readdir off 1 -> '..'\n");
if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1),
ceph_translate_ino(inode->i_sb, ino),
@@ -263,18 +281,18 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
/* can we use the dcache? */
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if ((filp->f_pos == 2 || fi->dentry) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
- (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
+ ceph_dir_test_complete(inode) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
err = __dcache_readdir(filp, dirent, filldir);
if (err != -EAGAIN)
return err;
} else {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
if (fi->dentry) {
err = note_last_dentry(fi, fi->dentry->d_name.name,
@@ -333,7 +351,7 @@ more:
if (!req->r_did_prepopulate) {
dout("readdir !did_prepopulate");
- fi->dir_release_count--; /* preclude I_COMPLETE */
+ fi->dir_release_count--; /* preclude D_COMPLETE */
}
/* note next offset and last dentry name */
@@ -403,20 +421,19 @@ more:
dout("readdir next frag is %x\n", frag);
goto more;
}
- fi->at_end = 1;
+ fi->flags |= CEPH_F_ATEND;
/*
* if dir_release_count still matches the dir, no dentries
* were released during the whole readdir, and we should have
* the complete dir contents in our cache.
*/
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_release_count == fi->dir_release_count) {
- dout(" marking %p complete\n", inode);
- /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
+ ceph_dir_set_complete(inode);
ci->i_max_offset = filp->f_pos;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("readdir %p filp %p done.\n", inode, filp);
return 0;
@@ -435,7 +452,7 @@ static void reset_readdir(struct ceph_file_info *fi)
dput(fi->dentry);
fi->dentry = NULL;
}
- fi->at_end = 0;
+ fi->flags &= ~CEPH_F_ATEND;
}
static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
@@ -446,19 +463,24 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
loff_t retval;
mutex_lock(&inode->i_mutex);
+ retval = -EINVAL;
switch (origin) {
case SEEK_END:
offset += inode->i_size + 2; /* FIXME */
break;
case SEEK_CUR:
offset += file->f_pos;
+ case SEEK_SET:
+ break;
+ default:
+ goto out;
}
- retval = -EINVAL;
+
if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) {
if (offset != file->f_pos) {
file->f_pos = offset;
file->f_version = 0;
- fi->at_end = 0;
+ fi->flags &= ~CEPH_F_ATEND;
}
retval = offset;
@@ -477,26 +499,19 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin)
if (offset > old_offset)
fi->dir_release_count--;
}
+out:
mutex_unlock(&inode->i_mutex);
return retval;
}
/*
- * Process result of a lookup/open request.
- *
- * Mainly, make sure we return the final req->r_dentry (if it already
- * existed) in place of the original VFS-provided dentry when they
- * differ.
- *
- * Gracefully handle the case where the MDS replies with -ENOENT and
- * no trace (which it may do, at its discretion, e.g., if it doesn't
- * care to issue a lease on the negative dentry).
+ * Handle lookups for the hidden .snap directory.
*/
-struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
- struct dentry *dentry, int err)
+int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
- struct inode *parent = dentry->d_parent->d_inode;
+ struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */
/* .snap dir? */
if (err == -ENOENT &&
@@ -510,7 +525,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
d_add(dentry, inode);
err = 0;
}
+ return err;
+}
+/*
+ * Figure out final result of a lookup/open request.
+ *
+ * Mainly, make sure we return the final req->r_dentry (if it already
+ * existed) in place of the original VFS-provided dentry when they
+ * differ.
+ *
+ * Gracefully handle the case where the MDS replies with -ENOENT and
+ * no trace (which it may do, at its discretion, e.g., if it doesn't
+ * care to issue a lease on the negative dentry).
+ */
+struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
+{
if (err == -ENOENT) {
/* no trace? */
err = 0;
@@ -566,7 +597,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
/* open (but not create!) intent? */
if (nd &&
(nd->flags & LOOKUP_OPEN) &&
- (nd->flags & LOOKUP_CONTINUE) == 0 && /* only open last component */
!(nd->intent.open.flags & O_CREAT)) {
int mode = nd->intent.open.create_mode & ~current->fs->umask;
return ceph_lookup_open(dir, dentry, nd, mode, 1);
@@ -577,21 +607,21 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_inode_info *ci = ceph_inode(dir);
struct ceph_dentry_info *di = ceph_dentry(dentry);
- spin_lock(&dir->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags);
if (strncmp(dentry->d_name.name,
fsc->mount_options->snapdir_name,
dentry->d_name.len) &&
!is_root_ceph_dentry(dir, dentry) &&
- (ci->i_ceph_flags & CEPH_I_COMPLETE) &&
+ ceph_dir_test_complete(dir) &&
(__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1))) {
- spin_unlock(&dir->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout(" dir %p complete, -ENOENT\n", dir);
d_add(dentry, NULL);
di->lease_shared_gen = ci->i_shared_gen;
return NULL;
}
- spin_unlock(&dir->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
op = ceph_snap(dir) == CEPH_SNAPDIR ?
@@ -605,6 +635,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry);
@@ -784,6 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */
+ req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = dir;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -809,12 +841,12 @@ static int drop_caps_for_unlink(struct inode *inode)
struct ceph_inode_info *ci = ceph_inode(inode);
int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (inode->i_nlink == 1) {
drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
ci->i_ceph_flags |= CEPH_I_NODELAY;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return drop;
}
@@ -882,6 +914,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_dentry = dget(new_dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
+ req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry);
req->r_locked_dir = new_dir;
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
@@ -900,7 +933,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
*/
/* d_move screws up d_subdirs order */
- ceph_i_clear(new_dir, CEPH_I_COMPLETE);
+ ceph_dir_clear_complete(new_dir);
d_move(old_dentry, new_dentry);
@@ -982,10 +1015,10 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
struct ceph_dentry_info *di = ceph_dentry(dentry);
int valid = 0;
- spin_lock(&dir->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_shared_gen == di->lease_shared_gen)
valid = __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1);
- spin_unlock(&dir->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("dir_lease_is_valid dir %p v%u dentry %p v%u = %d\n",
dir, (unsigned)ci->i_shared_gen, dentry,
(unsigned)di->lease_shared_gen, valid);
@@ -997,36 +1030,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
*/
static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd)
{
+ int valid = 0;
struct inode *dir;
if (nd && nd->flags & LOOKUP_RCU)
return -ECHILD;
- dir = dentry->d_parent->d_inode;
-
dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode,
ceph_dentry(dentry)->offset);
+ dir = ceph_get_dentry_parent_inode(dentry);
+
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry,
dentry->d_name.len, dentry->d_name.name, dentry->d_inode);
- goto out_touch;
+ valid = 1;
+ } else if (dentry->d_inode &&
+ ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) {
+ valid = 1;
+ } else if (dentry_lease_is_valid(dentry) ||
+ dir_lease_is_valid(dir, dentry)) {
+ valid = 1;
}
- if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR)
- goto out_touch;
- if (dentry_lease_is_valid(dentry) ||
- dir_lease_is_valid(dir, dentry))
- goto out_touch;
-
- dout("d_revalidate %p invalid\n", dentry);
- d_drop(dentry);
- return 0;
-out_touch:
- ceph_dentry_lru_touch(dentry);
- return 1;
+ dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
+ if (valid)
+ ceph_dentry_lru_touch(dentry);
+ else
+ d_drop(dentry);
+ iput(dir);
+ return valid;
}
/*
@@ -1056,7 +1091,52 @@ static int ceph_snapdir_d_revalidate(struct dentry *dentry,
return 1;
}
+/*
+ * Set/clear/test dir complete flag on the dir's dentry.
+ */
+void ceph_dir_set_complete(struct inode *inode)
+{
+ /* not yet implemented */
+}
+
+void ceph_dir_clear_complete(struct inode *inode)
+{
+ /* not yet implemented */
+}
+bool ceph_dir_test_complete(struct inode *inode)
+{
+ /* not yet implemented */
+ return false;
+}
+
+/*
+ * When the VFS prunes a dentry from the cache, we need to clear the
+ * complete flag on the parent directory.
+ *
+ * Called under dentry->d_lock.
+ */
+static void ceph_d_prune(struct dentry *dentry)
+{
+ struct ceph_dentry_info *di;
+
+ dout("ceph_d_prune %p\n", dentry);
+
+ /* do we have a valid parent? */
+ if (!dentry->d_parent || IS_ROOT(dentry))
+ return;
+
+ /* if we are not hashed, we don't affect D_COMPLETE */
+ if (d_unhashed(dentry))
+ return;
+
+ /*
+ * we hold d_lock, so d_parent is stable, and d_fsdata is never
+ * cleared until d_release
+ */
+ di = ceph_dentry(dentry->d_parent);
+ clear_bit(CEPH_D_COMPLETE, &di->flags);
+}
/*
* read() on a dir. This weird interface hack only works if mounted
@@ -1113,7 +1193,8 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size,
* an fsync() on a dir will wait for any uncommitted directory
* operations to commit.
*/
-static int ceph_dir_fsync(struct file *file, int datasync)
+static int ceph_dir_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
struct inode *inode = file->f_path.dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1123,6 +1204,11 @@ static int ceph_dir_fsync(struct file *file, int datasync)
int ret = 0;
dout("dir_fsync %p\n", inode);
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
+
spin_lock(&ci->i_unsafe_lock);
if (list_empty(head))
goto out;
@@ -1156,6 +1242,8 @@ static int ceph_dir_fsync(struct file *file, int datasync)
} while (req->r_tid < last_tid);
out:
spin_unlock(&ci->i_unsafe_lock);
+ mutex_unlock(&inode->i_mutex);
+
return ret;
}
@@ -1215,9 +1303,8 @@ void ceph_dentry_lru_del(struct dentry *dn)
* Return name hash for a given dentry. This is dependent on
* the parent directory's hash function.
*/
-unsigned ceph_dentry_hash(struct dentry *dn)
+unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn)
{
- struct inode *dir = dn->d_parent->d_inode;
struct ceph_inode_info *dci = ceph_inode(dir);
switch (dci->i_dir_layout.dl_dir_hash) {
@@ -1263,6 +1350,7 @@ const struct inode_operations ceph_dir_iops = {
const struct dentry_operations ceph_dentry_ops = {
.d_revalidate = ceph_d_revalidate,
.d_release = ceph_d_release,
+ .d_prune = ceph_d_prune,
};
const struct dentry_operations ceph_snapdir_dentry_ops = {
@@ -1272,4 +1360,5 @@ const struct dentry_operations ceph_snapdir_dentry_ops = {
const struct dentry_operations ceph_snap_dentry_ops = {
.d_release = ceph_d_release,
+ .d_prune = ceph_d_prune,
};
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index a080779..b001030 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -46,7 +46,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
int type;
struct ceph_nfs_fh *fh = (void *)rawfh;
struct ceph_nfs_confh *cfh = (void *)rawfh;
- struct dentry *parent = dentry->d_parent;
+ struct dentry *parent;
struct inode *inode = dentry->d_inode;
int connected_handle_length = sizeof(*cfh)/4;
int handle_length = sizeof(*fh)/4;
@@ -55,26 +55,33 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len,
if (ceph_snap(inode) != CEPH_NOSNAP)
return -EINVAL;
+ spin_lock(&dentry->d_lock);
+ parent = dget(dentry->d_parent);
+ spin_unlock(&dentry->d_lock);
+
if (*max_len >= connected_handle_length) {
dout("encode_fh %p connectable\n", dentry);
cfh->ino = ceph_ino(dentry->d_inode);
cfh->parent_ino = ceph_ino(parent->d_inode);
- cfh->parent_name_hash = ceph_dentry_hash(parent);
+ cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode,
+ dentry);
*max_len = connected_handle_length;
type = 2;
} else if (*max_len >= handle_length) {
if (connectable) {
*max_len = connected_handle_length;
- return 255;
+ type = 255;
+ } else {
+ dout("encode_fh %p\n", dentry);
+ fh->ino = ceph_ino(dentry->d_inode);
+ *max_len = handle_length;
+ type = 1;
}
- dout("encode_fh %p\n", dentry);
- fh->ino = ceph_ino(dentry->d_inode);
- *max_len = handle_length;
- type = 1;
} else {
*max_len = handle_length;
- return 255;
+ type = 255;
}
+ dput(parent);
return type;
}
@@ -126,7 +133,6 @@ static struct dentry *__fh_to_dentry(struct super_block *sb,
return dentry;
}
err = ceph_init_dentry(dentry);
-
if (err < 0) {
iput(inode);
return ERR_PTR(err);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4698a5c..ed72428 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -122,7 +122,7 @@ int ceph_open(struct inode *inode, struct file *file)
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
struct ceph_file_info *cf = file->private_data;
- struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
+ struct inode *parent_inode = NULL;
int err;
int flags, fmode, wanted;
@@ -147,9 +147,9 @@ int ceph_open(struct inode *inode, struct file *file)
/* trivially open snapdir */
if (ceph_snap(inode) == CEPH_SNAPDIR) {
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__ceph_get_fmode(ci, fmode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return ceph_init_file(inode, file, fmode);
}
@@ -158,7 +158,7 @@ int ceph_open(struct inode *inode, struct file *file)
* write) or any MDS (for read). Update wanted set
* asynchronously.
*/
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci);
@@ -168,7 +168,7 @@ int ceph_open(struct inode *inode, struct file *file)
inode, fmode, ceph_cap_string(wanted),
ceph_cap_string(issued));
__ceph_get_fmode(ci, fmode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
/* adjust wanted? */
if ((issued & wanted) != wanted &&
@@ -180,10 +180,10 @@ int ceph_open(struct inode *inode, struct file *file)
} else if (ceph_snap(inode) != CEPH_NOSNAP &&
(ci->i_snap_caps & wanted) == wanted) {
__ceph_get_fmode(ci, fmode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return ceph_init_file(inode, file, fmode);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
req = prepare_open_request(inode->i_sb, flags, 0);
@@ -194,7 +194,10 @@ int ceph_open(struct inode *inode, struct file *file)
req->r_inode = inode;
ihold(inode);
req->r_num_caps = 1;
+ if (flags & (O_CREAT|O_TRUNC))
+ parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
if (!err)
err = ceph_init_file(inode, file, req->r_fmode);
ceph_mdsc_put_request(req);
@@ -222,11 +225,11 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
- struct file *file = nd->intent.open.file;
- struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry);
+ struct file *file;
struct ceph_mds_request *req;
+ struct dentry *ret;
int err;
- int flags = nd->intent.open.flags - 1; /* silly vfs! */
+ int flags = nd->intent.open.flags;
dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
@@ -242,16 +245,24 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry,
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
}
req->r_locked_dir = dir; /* caller holds dir->i_mutex */
- err = ceph_mdsc_do_request(mdsc, parent_inode, req);
- dentry = ceph_finish_lookup(req, dentry, err);
- if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+ err = ceph_mdsc_do_request(mdsc,
+ (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
+ req);
+ err = ceph_handle_snapdir(req, dentry, err);
+ if (err)
+ goto out;
+ if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
- if (!err)
- err = ceph_init_file(req->r_dentry->d_inode, file,
- req->r_fmode);
+ if (err)
+ goto out;
+ file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open);
+ if (IS_ERR(file))
+ err = PTR_ERR(file);
+out:
+ ret = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req);
- dout("ceph_lookup_open result=%p\n", dentry);
- return dentry;
+ dout("ceph_lookup_open result=%p\n", ret);
+ return ret;
}
int ceph_release(struct inode *inode, struct file *file)
@@ -643,7 +654,8 @@ again:
if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
- (inode->i_sb->s_flags & MS_SYNCHRONOUS))
+ (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
+ (fi->flags & CEPH_F_SYNC))
/* hmm, this isn't really async... */
ret = ceph_sync_read(filp, base, len, ppos, &checkeof);
else
@@ -712,7 +724,7 @@ retry_snap:
want = CEPH_CAP_FILE_BUFFER;
ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
if (ret < 0)
- goto out;
+ goto out_put;
dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
@@ -720,12 +732,23 @@ retry_snap:
if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
(iocb->ki_filp->f_flags & O_DIRECT) ||
- (inode->i_sb->s_flags & MS_SYNCHRONOUS)) {
+ (inode->i_sb->s_flags & MS_SYNCHRONOUS) ||
+ (fi->flags & CEPH_F_SYNC)) {
ret = ceph_sync_write(file, iov->iov_base, iov->iov_len,
&iocb->ki_pos);
} else {
- ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+ /*
+ * buffered write; drop Fw early to avoid slow
+ * revocation if we get stuck on balance_dirty_pages
+ */
+ int dirty;
+
+ spin_lock(&ci->i_ceph_lock);
+ dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
+ spin_unlock(&ci->i_ceph_lock);
+ ceph_put_cap_refs(ci, got);
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
if ((ret >= 0 || ret == -EIOCBQUEUED) &&
((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host)
|| ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) {
@@ -733,22 +756,28 @@ retry_snap:
if (err < 0)
ret = err;
}
+
+ if (dirty)
+ __mark_inode_dirty(inode, dirty);
+ goto out;
}
+
if (ret >= 0) {
int dirty;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
}
-out:
+out_put:
dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len,
ceph_cap_string(got));
ceph_put_cap_refs(ci, got);
+out:
if (ret == -EOLDSNAPC) {
dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n",
inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len);
@@ -768,13 +797,17 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
mutex_lock(&inode->i_mutex);
__ceph_do_pending_vmtruncate(inode);
- switch (origin) {
- case SEEK_END:
+
+ if (origin == SEEK_END || origin == SEEK_DATA || origin == SEEK_HOLE) {
ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE);
if (ret < 0) {
offset = ret;
goto out;
}
+ }
+
+ switch (origin) {
+ case SEEK_END:
offset += inode->i_size;
break;
case SEEK_CUR:
@@ -790,6 +823,19 @@ static loff_t ceph_llseek(struct file *file, loff_t offset, int origin)
}
offset += file->f_pos;
break;
+ case SEEK_DATA:
+ if (offset >= inode->i_size) {
+ ret = -ENXIO;
+ goto out;
+ }
+ break;
+ case SEEK_HOLE:
+ if (offset >= inode->i_size) {
+ ret = -ENXIO;
+ goto out;
+ }
+ offset = inode->i_size;
+ break;
}
if (offset < 0 || offset > inode->i_sb->s_maxbytes) {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index d8858e9..8e889b7 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -9,7 +9,6 @@
#include <linux/namei.h>
#include <linux/writeback.h>
#include <linux/vmalloc.h>
-#include <linux/pagevec.h>
#include "super.h"
#include "mds_client.h"
@@ -298,6 +297,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
dout("alloc_inode %p\n", &ci->vfs_inode);
+ spin_lock_init(&ci->i_ceph_lock);
+
ci->i_version = 0;
ci->i_time_warp_seq = 0;
ci->i_ceph_flags = 0;
@@ -560,7 +561,8 @@ static int fill_inode(struct inode *inode,
struct ceph_mds_reply_inode *info = iinfo->in;
struct ceph_inode_info *ci = ceph_inode(inode);
int i;
- int issued, implemented;
+ int issued = 0, implemented;
+ int updating_inode = 0;
struct timespec mtime, atime, ctime;
u32 nsplits;
struct ceph_buffer *xattr_blob = NULL;
@@ -583,7 +585,7 @@ static int fill_inode(struct inode *inode,
iinfo->xattr_len);
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
/*
* provided version will be odd if inode value is projected,
@@ -599,7 +601,8 @@ static int fill_inode(struct inode *inode,
if (le64_to_cpu(info->version) > 0 &&
(ci->i_version & ~1) >= le64_to_cpu(info->version))
goto no_change;
-
+
+ updating_inode = 1;
issued = __ceph_caps_issued(ci, &implemented);
issued |= implemented | __ceph_caps_dirty(ci);
@@ -617,7 +620,7 @@ static int fill_inode(struct inode *inode,
}
if ((issued & CEPH_CAP_LINK_EXCL) == 0)
- inode->i_nlink = le32_to_cpu(info->nlink);
+ set_nlink(inode, le32_to_cpu(info->nlink));
/* be careful with mtime, atime, size */
ceph_decode_timespec(&atime, &info->atime);
@@ -679,7 +682,7 @@ static int fill_inode(struct inode *inode,
char *sym;
BUG_ON(symlen != inode->i_size);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
err = -ENOMEM;
sym = kmalloc(symlen+1, GFP_NOFS);
@@ -688,7 +691,7 @@ static int fill_inode(struct inode *inode,
memcpy(sym, iinfo->symlink, symlen);
sym[symlen] = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (!ci->i_symlink)
ci->i_symlink = sym;
else
@@ -707,17 +710,6 @@ static int fill_inode(struct inode *inode,
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ceph_decode_timespec(&ci->i_rctime, &info->rctime);
-
- /* set dir completion flag? */
- if (ci->i_files == 0 && ci->i_subdirs == 0 &&
- ceph_snap(inode) == CEPH_NOSNAP &&
- (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
- (issued & CEPH_CAP_FILE_EXCL) == 0 &&
- (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
- dout(" marking %p complete (empty)\n", inode);
- /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */
- ci->i_max_offset = 2;
- }
break;
default:
pr_err("fill_inode %llx.%llx BAD mode 0%o\n",
@@ -725,7 +717,7 @@ static int fill_inode(struct inode *inode,
}
no_change:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
/* queue truncate if we saw i_size decrease */
if (queue_trunc)
@@ -760,13 +752,13 @@ no_change:
info->cap.flags,
caps_reservation);
} else {
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout(" %p got snap_caps %s\n", inode,
ceph_cap_string(le32_to_cpu(info->cap.caps)));
ci->i_snap_caps |= le32_to_cpu(info->cap.caps);
if (cap_fmode >= 0)
__ceph_get_fmode(ci, cap_fmode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
} else if (cap_fmode >= 0) {
pr_warning("mds issued no caps on %llx.%llx\n",
@@ -774,6 +766,19 @@ no_change:
__ceph_get_fmode(ci, cap_fmode);
}
+ /* set dir completion flag? */
+ if (S_ISDIR(inode->i_mode) &&
+ updating_inode && /* didn't jump to no_change */
+ ci->i_files == 0 && ci->i_subdirs == 0 &&
+ ceph_snap(inode) == CEPH_NOSNAP &&
+ (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) &&
+ (issued & CEPH_CAP_FILE_EXCL) == 0 &&
+ !ceph_dir_test_complete(inode)) {
+ dout(" marking %p complete (empty)\n", inode);
+ ceph_dir_set_complete(inode);
+ ci->i_max_offset = 2;
+ }
+
/* update delegation info? */
if (dirinfo)
ceph_fill_dirfrag(inode, dirinfo);
@@ -805,14 +810,14 @@ static void update_dentry_lease(struct dentry *dentry,
return;
spin_lock(&dentry->d_lock);
- dout("update_dentry_lease %p mask %d duration %lu ms ttl %lu\n",
- dentry, le16_to_cpu(lease->mask), duration, ttl);
+ dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
+ dentry, duration, ttl);
/* make lease_rdcache_gen match directory */
dir = dentry->d_parent->d_inode;
di->lease_shared_gen = ceph_inode(dir)->i_shared_gen;
- if (lease->mask == 0)
+ if (duration == 0)
goto out_unlock;
if (di->lease_gen == session->s_cap_gen &&
@@ -839,30 +844,33 @@ out_unlock:
/*
* Set dentry's directory position based on the current dir's max, and
* order it in d_subdirs, so that dcache_readdir behaves.
+ *
+ * Always called under directory's i_mutex.
*/
static void ceph_set_dentry_offset(struct dentry *dn)
{
struct dentry *dir = dn->d_parent;
- struct inode *inode = dn->d_parent->d_inode;
+ struct inode *inode = dir->d_inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_dentry_info *di;
BUG_ON(!inode);
di = ceph_dentry(dn);
- spin_lock(&inode->i_lock);
- if ((ceph_inode(inode)->i_ceph_flags & CEPH_I_COMPLETE) == 0) {
- spin_unlock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
+ if (!ceph_dir_test_complete(inode)) {
+ spin_unlock(&ci->i_ceph_lock);
return;
}
di->offset = ceph_inode(inode)->i_max_offset++;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
spin_lock(&dir->d_lock);
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&dn->d_u.d_child, &dir->d_subdirs);
+ list_move(&dn->d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
- dn->d_u.d_child.prev, dn->d_u.d_child.next);
+ dn->d_child.prev, dn->d_child.next);
spin_unlock(&dn->d_lock);
spin_unlock(&dir->d_lock);
}
@@ -1022,9 +1030,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
/* do we have a dn lease? */
have_lease = have_dir_cap ||
- (le16_to_cpu(rinfo->dlease->mask) &
- CEPH_LOCK_DN);
-
+ le32_to_cpu(rinfo->dlease->duration_ms);
if (!have_lease)
dout("fill_trace no dentry lease or dir cap\n");
@@ -1053,7 +1059,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
* d_move() puts the renamed dentry at the end of
* d_subdirs. We need to assign it an appropriate
* directory offset so we can behave when holding
- * I_COMPLETE.
+ * D_COMPLETE.
*/
ceph_set_dentry_offset(req->r_old_dentry);
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
@@ -1250,7 +1256,7 @@ retry_lookup:
/* reorder parent's d_subdirs */
spin_lock(&parent->d_lock);
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&dn->d_u.d_child, &parent->d_subdirs);
+ list_move(&dn->d_child, &parent->d_subdirs);
spin_unlock(&dn->d_lock);
spin_unlock(&parent->d_lock);
}
@@ -1305,7 +1311,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
struct ceph_inode_info *ci = ceph_inode(inode);
int ret = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
inode->i_size = size;
inode->i_blocks = (size + (1 << 9) - 1) >> 9;
@@ -1315,7 +1321,7 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
(ci->i_reported_size << 1) < ci->i_max_size)
ret = 1;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return ret;
}
@@ -1325,12 +1331,13 @@ int ceph_inode_set_size(struct inode *inode, loff_t size)
*/
void ceph_queue_writeback(struct inode *inode)
{
+ ihold(inode);
if (queue_work(ceph_inode_to_client(inode)->wb_wq,
&ceph_inode(inode)->i_wb_work)) {
dout("ceph_queue_writeback %p\n", inode);
- ihold(inode);
} else {
dout("ceph_queue_writeback %p failed\n", inode);
+ iput(inode);
}
}
@@ -1350,55 +1357,13 @@ static void ceph_writeback_work(struct work_struct *work)
*/
void ceph_queue_invalidate(struct inode *inode)
{
+ ihold(inode);
if (queue_work(ceph_inode_to_client(inode)->pg_inv_wq,
&ceph_inode(inode)->i_pg_inv_work)) {
dout("ceph_queue_invalidate %p\n", inode);
- ihold(inode);
} else {
dout("ceph_queue_invalidate %p failed\n", inode);
- }
-}
-
-/*
- * invalidate any pages that are not dirty or under writeback. this
- * includes pages that are clean and mapped.
- */
-static void ceph_invalidate_nondirty_pages(struct address_space *mapping)
-{
- struct pagevec pvec;
- pgoff_t next = 0;
- int i;
-
- pagevec_init(&pvec, 0);
- while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
- for (i = 0; i < pagevec_count(&pvec); i++) {
- struct page *page = pvec.pages[i];
- pgoff_t index;
- int skip_page =
- (PageDirty(page) || PageWriteback(page));
-
- if (!skip_page)
- skip_page = !trylock_page(page);
-
- /*
- * We really shouldn't be looking at the ->index of an
- * unlocked page. But we're not allowed to lock these
- * pages. So we rely upon nobody altering the ->index
- * of this (pinned-by-us) page.
- */
- index = page->index;
- if (index > next)
- next = index;
- next++;
-
- if (skip_page)
- continue;
-
- generic_error_remove_page(mapping, page);
- unlock_page(page);
- }
- pagevec_release(&pvec);
- cond_resched();
+ iput(inode);
}
}
@@ -1414,20 +1379,20 @@ static void ceph_invalidate_work(struct work_struct *work)
u32 orig_gen;
int check = 0;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout("invalidate_pages %p gen %d revoking %d\n", inode,
ci->i_rdcache_gen, ci->i_rdcache_revoking);
if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
/* nevermind! */
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
goto out;
}
orig_gen = ci->i_rdcache_gen;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
- ceph_invalidate_nondirty_pages(inode->i_mapping);
+ truncate_inode_pages(&inode->i_data, 0);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (orig_gen == ci->i_rdcache_gen &&
orig_gen == ci->i_rdcache_revoking) {
dout("invalidate_pages %p gen %d successful\n", inode,
@@ -1439,7 +1404,7 @@ static void ceph_invalidate_work(struct work_struct *work)
inode, orig_gen, ci->i_rdcache_gen,
ci->i_rdcache_revoking);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (check)
ceph_check_caps(ci, 0, NULL);
@@ -1474,13 +1439,14 @@ void ceph_queue_vmtruncate(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ ihold(inode);
if (queue_work(ceph_sb_to_client(inode->i_sb)->trunc_wq,
&ci->i_vmtruncate_work)) {
dout("ceph_queue_vmtruncate %p\n", inode);
- ihold(inode);
} else {
dout("ceph_queue_vmtruncate %p failed, pending=%d\n",
inode, ci->i_truncate_pending);
+ iput(inode);
}
}
@@ -1497,10 +1463,10 @@ void __ceph_do_pending_vmtruncate(struct inode *inode)
int wrbuffer_refs, wake = 0;
retry:
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_truncate_pending == 0) {
dout("__do_pending_vmtruncate %p none pending\n", inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return;
}
@@ -1511,7 +1477,7 @@ retry:
if (ci->i_wrbuffer_ref_head < ci->i_wrbuffer_ref) {
dout("__do_pending_vmtruncate %p flushing snaps first\n",
inode);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
filemap_write_and_wait_range(&inode->i_data, 0,
inode->i_sb->s_maxbytes);
goto retry;
@@ -1521,15 +1487,15 @@ retry:
wrbuffer_refs = ci->i_wrbuffer_ref;
dout("__do_pending_vmtruncate %p (%d) to %lld\n", inode,
ci->i_truncate_pending, to);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
truncate_inode_pages(inode->i_mapping, to);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_truncate_pending--;
if (ci->i_truncate_pending == 0)
wake = 1;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (wrbuffer_refs == 0)
ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
@@ -1560,7 +1526,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct inode *parent_inode = dentry->d_parent->d_inode;
+ struct inode *parent_inode;
const unsigned int ia_valid = attr->ia_valid;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc;
@@ -1584,7 +1550,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (IS_ERR(req))
return PTR_ERR(req);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
dout("setattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -1732,7 +1698,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
}
release &= issued;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (inode_dirty_flags)
__mark_inode_dirty(inode, inode_dirty_flags);
@@ -1743,7 +1709,9 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
req->r_inode_drop = release;
req->r_args.setattr.mask = cpu_to_le32(mask);
req->r_num_caps = 1;
+ parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
}
dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err,
ceph_cap_string(dirtied), mask);
@@ -1752,7 +1720,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
__ceph_do_pending_vmtruncate(inode);
return err;
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
ceph_mdsc_put_request(req);
return err;
}
@@ -1795,17 +1763,17 @@ int ceph_do_getattr(struct inode *inode, int mask)
* Check inode permissions. We verify we have a valid value for
* the AUTH cap, then call the generic handler.
*/
-int ceph_permission(struct inode *inode, int mask, unsigned int flags)
+int ceph_permission(struct inode *inode, int mask)
{
int err;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED);
if (!err)
- err = generic_permission(inode, mask, flags, NULL);
+ err = generic_permission(inode, mask);
return err;
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index ef0b5f4..790914a59 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -38,21 +38,43 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg)
static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
{
struct inode *inode = file->f_dentry->d_inode;
- struct inode *parent_inode = file->f_dentry->d_parent->d_inode;
+ struct inode *parent_inode;
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
struct ceph_mds_request *req;
struct ceph_ioctl_layout l;
+ struct ceph_inode_info *ci = ceph_inode(file->f_dentry->d_inode);
+ struct ceph_ioctl_layout nl;
int err, i;
- /* copy and validate */
if (copy_from_user(&l, arg, sizeof(l)))
return -EFAULT;
- if ((l.object_size & ~PAGE_MASK) ||
- (l.stripe_unit & ~PAGE_MASK) ||
- !l.stripe_unit ||
- (l.object_size &&
- (unsigned)l.object_size % (unsigned)l.stripe_unit))
+ /* validate changed params against current layout */
+ err = ceph_do_getattr(file->f_dentry->d_inode, CEPH_STAT_CAP_LAYOUT);
+ if (!err) {
+ nl.stripe_unit = ceph_file_layout_su(ci->i_layout);
+ nl.stripe_count = ceph_file_layout_stripe_count(ci->i_layout);
+ nl.object_size = ceph_file_layout_object_size(ci->i_layout);
+ nl.data_pool = le32_to_cpu(ci->i_layout.fl_pg_pool);
+ nl.preferred_osd =
+ (s32)le32_to_cpu(ci->i_layout.fl_pg_preferred);
+ } else
+ return err;
+
+ if (l.stripe_count)
+ nl.stripe_count = l.stripe_count;
+ if (l.stripe_unit)
+ nl.stripe_unit = l.stripe_unit;
+ if (l.object_size)
+ nl.object_size = l.object_size;
+ if (l.data_pool)
+ nl.data_pool = l.data_pool;
+ if (l.preferred_osd)
+ nl.preferred_osd = l.preferred_osd;
+
+ if ((nl.object_size & ~PAGE_MASK) ||
+ (nl.stripe_unit & ~PAGE_MASK) ||
+ ((unsigned)nl.object_size % (unsigned)nl.stripe_unit))
return -EINVAL;
/* make sure it's a valid data pool */
@@ -87,7 +109,9 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg)
req->r_args.setlayout.layout.fl_pg_preferred =
cpu_to_le32(l.preferred_osd);
+ parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
ceph_mdsc_put_request(req);
return err;
}
@@ -217,11 +241,11 @@ static long ceph_ioctl_lazyio(struct file *file)
struct ceph_inode_info *ci = ceph_inode(inode);
if ((fi->fmode & CEPH_FILE_MODE_LAZY) == 0) {
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_nr_by_mode[fi->fmode]--;
fi->fmode |= CEPH_FILE_MODE_LAZY;
ci->i_nr_by_mode[fi->fmode]++;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout("ioctl_layzio: file %p marked lazy\n", file);
ceph_check_caps(ci, 0, NULL);
@@ -231,6 +255,14 @@ static long ceph_ioctl_lazyio(struct file *file)
return 0;
}
+static long ceph_ioctl_syncio(struct file *file)
+{
+ struct ceph_file_info *fi = file->private_data;
+
+ fi->flags |= CEPH_F_SYNC;
+ return 0;
+}
+
long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg);
@@ -249,6 +281,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
case CEPH_IOC_LAZYIO:
return ceph_ioctl_lazyio(file);
+
+ case CEPH_IOC_SYNCIO:
+ return ceph_ioctl_syncio(file);
}
return -ENOTTY;
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index 52e8fd7..be4a604 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -6,7 +6,31 @@
#define CEPH_IOCTL_MAGIC 0x97
-/* just use u64 to align sanely on all archs */
+/*
+ * CEPH_IOC_GET_LAYOUT - get file layout or dir layout policy
+ * CEPH_IOC_SET_LAYOUT - set file layout
+ * CEPH_IOC_SET_LAYOUT_POLICY - set dir layout policy
+ *
+ * The file layout specifies how file data is striped over objects in
+ * the distributed object store, which object pool they belong to (if
+ * it differs from the default), and an optional 'preferred osd' to
+ * store them on.
+ *
+ * Files get a new layout based on the policy set on the containing
+ * directory or one of its ancestors. The GET_LAYOUT ioctl will let
+ * you examine the layout for a file or the policy on a directory.
+ *
+ * SET_LAYOUT will let you set a layout on a newly created file. This
+ * only works immediately after the file is created and before any
+ * data is written to it.
+ *
+ * SET_LAYOUT_POLICY will let you set a layout policy (default layout)
+ * on a directory that will apply to any new files created in that
+ * directory (or any child directory that doesn't specify a layout of
+ * its own).
+ */
+
+/* use u64 to align sanely on all archs */
struct ceph_ioctl_layout {
__u64 stripe_unit, stripe_count, object_size;
__u64 data_pool;
@@ -21,6 +45,8 @@ struct ceph_ioctl_layout {
struct ceph_ioctl_layout)
/*
+ * CEPH_IOC_GET_DATALOC - get location of file data in the cluster
+ *
* Extract identity, address of the OSD and object storing a given
* file offset.
*/
@@ -39,6 +65,34 @@ struct ceph_ioctl_dataloc {
#define CEPH_IOC_GET_DATALOC _IOWR(CEPH_IOCTL_MAGIC, 3, \
struct ceph_ioctl_dataloc)
+/*
+ * CEPH_IOC_LAZYIO - relax consistency
+ *
+ * Normally Ceph switches to synchronous IO when multiple clients have
+ * the file open (and or more for write). Reads and writes bypass the
+ * page cache and go directly to the OSD. Setting this flag on a file
+ * descriptor will allow buffered IO for this file in cases where the
+ * application knows it won't interfere with other nodes (or doesn't
+ * care).
+ */
#define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4)
+/*
+ * CEPH_IOC_SYNCIO - force synchronous IO
+ *
+ * This ioctl sets a file flag that forces the synchronous IO that
+ * bypasses the page cache, even if it is not necessary. This is
+ * essentially the opposite behavior of IOC_LAZYIO. This forces the
+ * same read/write path as a file opened by multiple clients when one
+ * or more of those clients is opened for write.
+ *
+ * Note that this type of sync IO takes a different path than a file
+ * opened with O_SYNC/D_SYNC (writes hit the page cache and are
+ * immediately flushed on page boundaries). It is very similar to
+ * O_DIRECT (writes bypass the page cache) excep that O_DIRECT writes
+ * are not copied (user page must remain stable) and O_DIRECT writes
+ * have alignment restrictions (on the buffer and file offset).
+ */
+#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5)
+
#endif
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 0c1d917..b24e2d3 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -483,22 +483,26 @@ void ceph_mdsc_release_request(struct kref *kref)
destroy_reply_info(&req->r_reply_info);
}
if (req->r_inode) {
- ceph_put_cap_refs(ceph_inode(req->r_inode),
- CEPH_CAP_PIN);
+ ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
iput(req->r_inode);
}
if (req->r_locked_dir)
- ceph_put_cap_refs(ceph_inode(req->r_locked_dir),
- CEPH_CAP_PIN);
+ ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
if (req->r_target_inode)
iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry) {
- ceph_put_cap_refs(
- ceph_inode(req->r_old_dentry->d_parent->d_inode),
- CEPH_CAP_PIN);
+ /*
+ * track (and drop pins for) r_old_dentry_dir
+ * separately, since r_old_dentry's d_parent may have
+ * changed between the dir mutex being dropped and
+ * this request being freed.
+ */
+ ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
dput(req->r_old_dentry);
+ iput(req->r_old_dentry_dir);
}
kfree(req->r_path1);
kfree(req->r_path2);
@@ -604,6 +608,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
+ complete_all(&req->r_safe_completion);
+
ceph_mdsc_put_request(req);
}
@@ -615,8 +621,14 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
*
* Called under mdsc->mutex.
*/
-struct dentry *get_nonsnap_parent(struct dentry *dentry)
+static struct dentry *get_nonsnap_parent(struct dentry *dentry)
{
+ /*
+ * we don't need to worry about protecting the d_parent access
+ * here because we never renaming inside the snapped namespace
+ * except to resplice to another snapdir, and either the old or new
+ * result is a valid result.
+ */
while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
dentry = dentry->d_parent;
return dentry;
@@ -652,7 +664,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
if (req->r_inode) {
inode = req->r_inode;
} else if (req->r_dentry) {
- struct inode *dir = req->r_dentry->d_parent->d_inode;
+ /* ignore race with rename; old or new d_parent is okay */
+ struct dentry *parent = req->r_dentry->d_parent;
+ struct inode *dir = parent->d_inode;
if (dir->i_sb != mdsc->fsc->sb) {
/* not this fs! */
@@ -660,8 +674,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else if (ceph_snap(dir) != CEPH_NOSNAP) {
/* direct snapped/virtual snapdir requests
* based on parent dir inode */
- struct dentry *dn =
- get_nonsnap_parent(req->r_dentry->d_parent);
+ struct dentry *dn = get_nonsnap_parent(parent);
inode = dn->d_inode;
dout("__choose_mds using nonsnap parent %p\n", inode);
} else if (req->r_dentry->d_inode) {
@@ -670,7 +683,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
} else {
/* dir + name */
inode = dir;
- hash = ceph_dentry_hash(req->r_dentry);
+ hash = ceph_dentry_hash(dir, req->r_dentry);
is_hash = true;
}
}
@@ -721,21 +734,21 @@ static int __choose_mds(struct ceph_mds_client *mdsc,
}
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap = NULL;
if (mode == USE_AUTH_MDS)
cap = ci->i_auth_cap;
if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
if (!cap) {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
goto random;
}
mds = cap->session->s_mds;
dout("choose_mds %p %llx.%llx mds%d (%scap %p)\n",
inode, ceph_vinop(inode), mds,
cap == ci->i_auth_cap ? "auth " : "", cap);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return mds;
random:
@@ -753,7 +766,8 @@ static struct ceph_msg *create_session_msg(u32 op, u64 seq)
struct ceph_msg *msg;
struct ceph_mds_session_head *h;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
+ false);
if (!msg) {
pr_err("create_session_msg ENOMEM creating msg\n");
return NULL;
@@ -939,7 +953,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
dout("removing cap %p, ci is %p, inode is %p\n",
cap, ci, &ci->vfs_inode);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__ceph_remove_cap(cap);
if (!__ceph_is_any_real_caps(ci)) {
struct ceph_mds_client *mdsc =
@@ -972,7 +986,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
}
spin_unlock(&mdsc->cap_dirty_lock);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
while (drop--)
iput(inode);
return 0;
@@ -1003,10 +1017,10 @@ static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
wake_up_all(&ci->i_cap_wq);
if (arg) {
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_wanted_max_size = 0;
ci->i_requested_max_size = 0;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
return 0;
}
@@ -1139,7 +1153,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
if (session->s_trim_caps <= 0)
return -1;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
mine = cap->issued | cap->implemented;
used = __ceph_caps_used(ci);
oissued = __ceph_caps_issued_other(ci, cap);
@@ -1158,7 +1172,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
__ceph_remove_cap(cap);
} else {
/* try to drop referring dentries */
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
d_prune_aliases(inode);
dout("trim_caps_cb %p cap %p pruned, count now %d\n",
inode, cap, atomic_read(&inode->i_count));
@@ -1166,7 +1180,7 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
}
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return 0;
}
@@ -1229,7 +1243,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc,
while (session->s_num_cap_releases < session->s_nr_caps + extra) {
spin_unlock(&session->s_cap_lock);
msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE, PAGE_CACHE_SIZE,
- GFP_NOFS);
+ GFP_NOFS, false);
if (!msg)
goto out_unlocked;
dout("add_cap_releases %p msg %p now %d\n", session, msg,
@@ -1284,7 +1298,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
i_flushing_item);
struct inode *inode = &ci->vfs_inode;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_cap_flush_seq <= want_flush_seq) {
dout("check_cap_flush still flushing %p "
"seq %lld <= %lld to mds%d\n", inode,
@@ -1292,7 +1306,7 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
session->s_mds);
ret = 0;
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
@@ -1483,6 +1497,7 @@ retry:
pos, temp);
} else if (stop_on_nosnap && inode &&
ceph_snap(inode) == CEPH_NOSNAP) {
+ spin_unlock(&temp->d_lock);
break;
} else {
pos -= temp->d_name.len;
@@ -1584,7 +1599,7 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
r = build_dentry_path(rdentry, ppath, pathlen, ino, freepath);
dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
*ppath);
- } else if (rpath) {
+ } else if (rpath || rino) {
*ino = rino;
*ppath = rpath;
*pathlen = strlen(rpath);
@@ -1641,7 +1656,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
if (req->r_old_dentry_drop)
len += req->r_old_dentry->d_name.len;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_REQUEST, len, GFP_NOFS, false);
if (!msg) {
msg = ERR_PTR(-ENOMEM);
goto out_free2;
@@ -1802,8 +1817,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result)
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
goto out;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -1931,9 +1949,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
if (req->r_locked_dir)
ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
if (req->r_old_dentry)
- ceph_get_cap_refs(
- ceph_inode(req->r_old_dentry->d_parent->d_inode),
- CEPH_CAP_PIN);
+ ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
+ CEPH_CAP_PIN);
/* issue */
mutex_lock(&mdsc->mutex);
@@ -1991,7 +2008,7 @@ out:
}
/*
- * Invalidate dir I_COMPLETE, dentry lease state on an aborted MDS
+ * Invalidate dir D_COMPLETE, dentry lease state on an aborted MDS
* namespace request.
*/
void ceph_invalidate_dir_request(struct ceph_mds_request *req)
@@ -1999,11 +2016,11 @@ void ceph_invalidate_dir_request(struct ceph_mds_request *req)
struct inode *inode = req->r_locked_dir;
struct ceph_inode_info *ci = ceph_inode(inode);
- dout("invalidate_dir_request %p (I_COMPLETE, lease(s))\n", inode);
- spin_lock(&inode->i_lock);
- ci->i_ceph_flags &= ~CEPH_I_COMPLETE;
+ dout("invalidate_dir_request %p (D_COMPLETE, lease(s))\n", inode);
+ spin_lock(&ci->i_ceph_lock);
+ ceph_dir_clear_complete(inode);
ci->i_release_count++;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (req->r_dentry)
ceph_invalidate_dentry_lease(req->r_dentry);
@@ -2112,7 +2129,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -2411,7 +2427,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
if (err)
goto out_free;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
cap->seq = 0; /* reset cap seq */
cap->issue_seq = 0; /* and issue_seq */
@@ -2434,7 +2450,7 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap,
rec.v1.pathbase = cpu_to_le64(pathbase);
reclen = sizeof(rec.v1);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (recon_state->flock) {
int num_fcntl_locks, num_flock_locks;
@@ -2508,7 +2524,7 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
goto fail_nopagelist;
ceph_pagelist_init(pagelist);
- reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS);
+ reply = ceph_msg_new(CEPH_MSG_CLIENT_RECONNECT, 0, GFP_NOFS, false);
if (!reply)
goto fail_nomsg;
@@ -2714,7 +2730,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
struct ceph_mds_lease *h = msg->front.iov_base;
u32 seq;
struct ceph_vino vino;
- int mask;
struct qstr dname;
int release = 0;
@@ -2725,7 +2740,6 @@ static void handle_lease(struct ceph_mds_client *mdsc,
goto bad;
vino.ino = le64_to_cpu(h->ino);
vino.snap = CEPH_NOSNAP;
- mask = le16_to_cpu(h->mask);
seq = le32_to_cpu(h->seq);
dname.name = (void *)h + sizeof(*h) + sizeof(u32);
dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32);
@@ -2737,8 +2751,8 @@ static void handle_lease(struct ceph_mds_client *mdsc,
/* lookup inode */
inode = ceph_find_inode(sb, vino);
- dout("handle_lease %s, mask %d, ino %llx %p %.*s\n",
- ceph_lease_op_name(h->action), mask, vino.ino, inode,
+ dout("handle_lease %s, ino %llx %p %.*s\n",
+ ceph_lease_op_name(h->action), vino.ino, inode,
dname.len, dname.name);
if (inode == NULL) {
dout("handle_lease no inode %llx\n", vino.ino);
@@ -2823,12 +2837,11 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
dnamelen = dentry->d_name.len;
len += dnamelen;
- msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS);
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
if (!msg)
return;
lease = msg->front.iov_base;
lease->action = action;
- lease->mask = cpu_to_le16(1);
lease->ino = cpu_to_le64(ceph_vino(inode).ino);
lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap);
lease->seq = cpu_to_le32(seq);
@@ -2850,7 +2863,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
* Pass @inode always, @dentry is optional.
*/
void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
- struct dentry *dentry, int mask)
+ struct dentry *dentry)
{
struct ceph_dentry_info *di;
struct ceph_mds_session *session;
@@ -2858,7 +2871,6 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
BUG_ON(inode == NULL);
BUG_ON(dentry == NULL);
- BUG_ON(mask == 0);
/* is dentry lease valid? */
spin_lock(&dentry->d_lock);
@@ -2868,8 +2880,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
di->lease_gen != di->lease_session->s_cap_gen ||
!time_before(jiffies, dentry->d_time)) {
dout("lease_release inode %p dentry %p -- "
- "no lease on %d\n",
- inode, dentry, mask);
+ "no lease\n",
+ inode, dentry);
spin_unlock(&dentry->d_lock);
return;
}
@@ -2880,8 +2892,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode,
__ceph_mdsc_drop_dentry_lease(dentry);
spin_unlock(&dentry->d_lock);
- dout("lease_release inode %p dentry %p mask %d to mds%d\n",
- inode, dentry, mask, session->s_mds);
+ dout("lease_release inode %p dentry %p to mds%d\n",
+ inode, dentry, session->s_mds);
ceph_mdsc_lease_send_msg(session, inode, dentry,
CEPH_MDS_LEASE_RELEASE, seq);
ceph_put_mds_session(session);
@@ -3147,7 +3159,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
/*
* true if all sessions are closed, or we force unmount
*/
-bool done_closing_sessions(struct ceph_mds_client *mdsc)
+static bool done_closing_sessions(struct ceph_mds_client *mdsc)
{
int i, n = 0;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 7d8a0d6..a50ca0e 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -20,7 +20,7 @@
*
* mdsc->snap_rwsem
*
- * inode->i_lock
+ * ci->i_ceph_lock
* mdsc->snap_flush_lock
* mdsc->cap_delay_lock
*
@@ -171,6 +171,7 @@ struct ceph_mds_request {
struct inode *r_inode; /* arg1 */
struct dentry *r_dentry; /* arg1 */
struct dentry *r_old_dentry; /* arg2: rename from or link from */
+ struct inode *r_old_dentry_dir; /* arg2: old dentry's parent dir */
char *r_path1, *r_path2;
struct ceph_vino r_ino1, r_ino2;
@@ -333,7 +334,7 @@ extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc);
extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc,
struct inode *inode,
- struct dentry *dn, int mask);
+ struct dentry *dn);
extern void ceph_invalidate_dir_request(struct ceph_mds_request *req);
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 54b14de..d5df940 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
return 0;
}
+
+static struct ceph_snap_context *empty_snapc;
+
/*
* build the snap context for a given realm.
*/
@@ -329,9 +332,15 @@ static int build_snap_context(struct ceph_snap_realm *realm)
return 0;
}
+ if (num == 0 && realm->seq == empty_snapc->seq) {
+ ceph_get_snap_context(empty_snapc);
+ snapc = empty_snapc;
+ goto done;
+ }
+
/* alloc new snap context */
err = -ENOMEM;
- if (num > ULONG_MAX / sizeof(u64) - sizeof(*snapc))
+ if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
goto fail;
snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS);
if (!snapc)
@@ -364,6 +373,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
dout("build_snap_context %llx %p: %p seq %lld (%d snaps)\n",
realm->ino, realm, snapc, snapc->seq, snapc->num_snaps);
+done:
if (realm->cached_context)
ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
@@ -446,9 +456,18 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
return;
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
used = __ceph_caps_used(ci);
dirty = __ceph_caps_dirty(ci);
+
+ /*
+ * If there is a write in progress, treat that as a dirty Fw,
+ * even though it hasn't completed yet; by the time we finish
+ * up this capsnap it will be.
+ */
+ if (used & CEPH_CAP_FILE_WR)
+ dirty |= CEPH_CAP_FILE_WR;
+
if (__ceph_have_pending_cap_snap(ci)) {
/* there is no point in queuing multiple "pending" cap_snaps,
as no new writes are allowed to start when pending, so any
@@ -456,13 +475,22 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
- } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) ||
- (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
- CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) {
+ } else if (ci->i_snap_realm->cached_context == empty_snapc) {
+ dout("queue_cap_snap %p empty snapc\n", inode);
+ kfree(capsnap);
+ } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
+ CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
- dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode,
- capsnap, snapc);
+ /*
+ * if we are a sync write, we may need to go to the snaprealm
+ * to get the current snapc.
+ */
+ if (!snapc)
+ snapc = ci->i_snap_realm->cached_context;
+
+ dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n",
+ inode, capsnap, snapc, ceph_cap_string(dirty));
ihold(inode);
atomic_set(&capsnap->nref, 1);
@@ -513,7 +541,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
kfree(capsnap);
}
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
/*
@@ -522,7 +550,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
*
* If capsnap can now be flushed, add to snap_flush list, and return 1.
*
- * Caller must hold i_lock.
+ * Caller must hold i_ceph_lock.
*/
int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap)
@@ -724,9 +752,9 @@ static void flush_snaps(struct ceph_mds_client *mdsc)
inode = &ci->vfs_inode;
ihold(inode);
spin_unlock(&mdsc->snap_flush_lock);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__ceph_flush_snaps(ci, &session, 0);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
iput(inode);
spin_lock(&mdsc->snap_flush_lock);
}
@@ -832,7 +860,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
continue;
ci = ceph_inode(inode);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (!ci->i_snap_realm)
goto skip_inode;
/*
@@ -861,7 +889,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
oldrealm = ci->i_snap_realm;
ci->i_snap_realm = realm;
spin_unlock(&realm->inodes_with_caps_lock);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
ceph_get_snap_realm(mdsc, realm);
ceph_put_snap_realm(mdsc, oldrealm);
@@ -870,7 +898,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc,
continue;
skip_inode:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
iput(inode);
}
@@ -912,5 +940,17 @@ out:
return;
}
+int __init ceph_snap_init(void)
+{
+ empty_snapc = kzalloc(sizeof(struct ceph_snap_context), GFP_NOFS);
+ if (!empty_snapc)
+ return -ENOMEM;
+ atomic_set(&empty_snapc->nref, 1);
+ empty_snapc->seq = 1;
+ return 0;
+}
-
+void ceph_snap_exit(void)
+{
+ ceph_put_snap_context(empty_snapc);
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 1775022..8bfafe5 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -79,8 +79,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_frsize = 1 << CEPH_BLOCK_SHIFT;
buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
- buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >>
- (CEPH_BLOCK_SHIFT-10);
+ buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
buf->f_files = le64_to_cpu(st.num_objects);
@@ -120,6 +119,7 @@ static int ceph_sync_fs(struct super_block *sb, int wait)
enum {
Opt_wsize,
Opt_rsize,
+ Opt_rasize,
Opt_caps_wanted_delay_min,
Opt_caps_wanted_delay_max,
Opt_cap_release_safety,
@@ -142,6 +142,7 @@ enum {
static match_table_t fsopt_tokens = {
{Opt_wsize, "wsize=%d"},
{Opt_rsize, "rsize=%d"},
+ {Opt_rasize, "rasize=%d"},
{Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
{Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
{Opt_cap_release_safety, "cap_release_safety=%d"},
@@ -202,6 +203,9 @@ static int parse_fsopt_token(char *c, void *private)
case Opt_rsize:
fsopt->rsize = intval;
break;
+ case Opt_rasize:
+ fsopt->rasize = intval;
+ break;
case Opt_caps_wanted_delay_min:
fsopt->caps_wanted_delay_min = intval;
break;
@@ -295,28 +299,29 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name);
- fsopt->sb_flags = flags;
- fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
+ fsopt->sb_flags = flags;
+ fsopt->flags = CEPH_MOUNT_OPT_DEFAULT;
- fsopt->rsize = CEPH_RSIZE_DEFAULT;
- fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
+ fsopt->rsize = CEPH_RSIZE_DEFAULT;
+ fsopt->rasize = CEPH_RASIZE_DEFAULT;
+ fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
fsopt->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
fsopt->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
- fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
- fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
- fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
- fsopt->congestion_kb = default_congestion_kb();
-
- /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
- err = -EINVAL;
- if (!dev_name)
- goto out;
- *path = strstr(dev_name, ":/");
- if (*path == NULL) {
- pr_err("device name is missing path (no :/ in %s)\n",
- dev_name);
- goto out;
- }
+ fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT;
+ fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT;
+ fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT;
+ fsopt->congestion_kb = default_congestion_kb();
+
+ /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
+ err = -EINVAL;
+ if (!dev_name)
+ goto out;
+ *path = strstr(dev_name, ":/");
+ if (*path == NULL) {
+ pr_err("device name is missing path (no :/ in %s)\n",
+ dev_name);
+ goto out;
+ }
dev_name_end = *path;
dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name);
@@ -356,8 +361,10 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
if (opt->flags & CEPH_OPT_NOCRC)
seq_puts(m, ",nocrc");
- if (opt->name)
- seq_printf(m, ",name=%s", opt->name);
+ if (opt->name) {
+ seq_puts(m, ",name=");
+ seq_escape(m, opt->name, ", \t\n\\");
+ }
if (opt->key)
seq_puts(m, ",secret=<hidden>");
@@ -382,6 +389,8 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_printf(m, ",wsize=%d", fsopt->wsize);
if (fsopt->rsize != CEPH_RSIZE_DEFAULT)
seq_printf(m, ",rsize=%d", fsopt->rsize);
+ if (fsopt->rasize != CEPH_RASIZE_DEFAULT)
+ seq_printf(m, ",rasize=%d", fsopt->rasize);
if (fsopt->congestion_kb != default_congestion_kb())
seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb);
if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT)
@@ -398,7 +407,7 @@ static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
- seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+ seq_show_option(m, "snapdirname", fsopt->snapdir_name);
return 0;
}
@@ -424,24 +433,27 @@ static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg)
/*
* create a new fs client
*/
-struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
+static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
struct ceph_options *opt)
{
struct ceph_fs_client *fsc;
+ const unsigned supported_features =
+ CEPH_FEATURE_FLOCK |
+ CEPH_FEATURE_DIRLAYOUTHASH;
+ const unsigned required_features = 0;
int err = -ENOMEM;
fsc = kzalloc(sizeof(*fsc), GFP_KERNEL);
if (!fsc)
return ERR_PTR(-ENOMEM);
- fsc->client = ceph_create_client(opt, fsc);
+ fsc->client = ceph_create_client(opt, fsc, supported_features,
+ required_features);
if (IS_ERR(fsc->client)) {
err = PTR_ERR(fsc->client);
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->supported_features |= CEPH_FEATURE_FLOCK |
- CEPH_FEATURE_DIRLAYOUTHASH;
fsc->client->monc.want_mdsmap = 1;
fsc->mount_options = fsopt;
@@ -497,7 +509,7 @@ fail:
return ERR_PTR(err);
}
-void destroy_fs_client(struct ceph_fs_client *fsc)
+static void destroy_fs_client(struct ceph_fs_client *fsc)
{
dout("destroy_fs_client %p\n", fsc);
@@ -633,10 +645,12 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
if (err == 0) {
dout("open_root_inode success\n");
if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
- fsc->sb->s_root == NULL)
+ fsc->sb->s_root == NULL) {
root = d_alloc_root(req->r_target_inode);
- else
+ ceph_init_dentry(root);
+ } else {
root = d_obtain_alias(req->r_target_inode);
+ }
req->r_target_inode = NULL;
dout("open_root_inode success, root dentry is %p\n", root);
} else {
@@ -780,11 +794,15 @@ static int ceph_register_bdi(struct super_block *sb,
{
int err;
- /* set ra_pages based on rsize mount option? */
- if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
+ /* set ra_pages based on rasize mount option? */
+ if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
fsc->backing_dev_info.ra_pages =
- (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+ (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
>> PAGE_SHIFT;
+ else
+ fsc->backing_dev_info.ra_pages =
+ default_backing_dev_info.ra_pages;
+
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d",
atomic_long_inc_return(&bdi_seq));
if (!err)
@@ -815,8 +833,8 @@ static struct dentry *ceph_mount(struct file_system_type *fs_type,
fsc = create_fs_client(fsopt, opt);
if (IS_ERR(fsc)) {
res = ERR_CAST(fsc);
- kfree(fsopt);
- kfree(opt);
+ destroy_mount_options(fsopt);
+ ceph_destroy_options(opt);
goto out_final;
}
@@ -895,14 +913,20 @@ static int __init init_ceph(void)
if (ret)
goto out;
- ret = register_filesystem(&ceph_fs_type);
+ ret = ceph_snap_init();
if (ret)
goto out_icache;
+ ret = register_filesystem(&ceph_fs_type);
+ if (ret)
+ goto out_snap;
+
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
+out_snap:
+ ceph_snap_exit();
out_icache:
destroy_caches();
out:
@@ -913,6 +937,7 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
+ ceph_snap_exit();
destroy_caches();
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 9091926..242df58 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -36,7 +36,8 @@
#define ceph_test_mount_opt(fsc, opt) \
(!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt))
-#define CEPH_RSIZE_DEFAULT (512*1024) /* readahead */
+#define CEPH_RSIZE_DEFAULT 0 /* max read size */
+#define CEPH_RASIZE_DEFAULT (8192*1024) /* readahead */
#define CEPH_MAX_READDIR_DEFAULT 1024
#define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024)
#define CEPH_SNAPDIRNAME_DEFAULT ".snap"
@@ -45,8 +46,9 @@ struct ceph_mount_options {
int flags;
int sb_flags;
- int wsize;
- int rsize; /* max readahead */
+ int wsize; /* max write size */
+ int rsize; /* max read size */
+ int rasize; /* max readahead */
int congestion_kb; /* max writeback in flight */
int caps_wanted_delay_min, caps_wanted_delay_max;
int cap_release_safety;
@@ -201,6 +203,7 @@ struct ceph_inode_xattr {
* Ceph dentry state
*/
struct ceph_dentry_info {
+ unsigned long flags;
struct ceph_mds_session *lease_session;
u32 lease_gen, lease_shared_gen;
u32 lease_seq;
@@ -211,6 +214,18 @@ struct ceph_dentry_info {
u64 offset;
};
+/*
+ * dentry flags
+ *
+ * The locking for D_COMPLETE is a bit odd:
+ * - we can clear it at almost any time (see ceph_d_prune)
+ * - it is only meaningful if:
+ * - we hold dir inode i_ceph_lock
+ * - we hold dir FILE_SHARED caps
+ * - the dentry D_COMPLETE is set
+ */
+#define CEPH_D_COMPLETE 1 /* if set, d_u.d_subdirs is complete directory */
+
struct ceph_inode_xattrs_info {
/*
* (still encoded) xattr blob. we avoid the overhead of parsing
@@ -235,6 +250,8 @@ struct ceph_inode_xattrs_info {
struct ceph_inode_info {
struct ceph_vino i_vino; /* ceph ino + snap */
+ spinlock_t i_ceph_lock;
+
u64 i_version;
u32 i_time_warp_seq;
@@ -249,14 +266,14 @@ struct ceph_inode_info {
struct timespec i_rctime;
u64 i_rbytes, i_rfiles, i_rsubdirs;
u64 i_files, i_subdirs;
- u64 i_max_offset; /* largest readdir offset, set with I_COMPLETE */
+ u64 i_max_offset; /* largest readdir offset, set with D_COMPLETE */
struct rb_root i_fragtree;
struct mutex i_fragtree_mutex;
struct ceph_inode_xattrs_info i_xattrs;
- /* capabilities. protected _both_ by i_lock and cap->session's
+ /* capabilities. protected _both_ by i_ceph_lock and cap->session's
* s_mutex. */
struct rb_root i_caps; /* cap list */
struct ceph_cap *i_auth_cap; /* authoritative cap, if any */
@@ -344,9 +361,10 @@ static inline struct ceph_vino ceph_vino(struct inode *inode)
* x86_64+ino32 64 32
* x86_64 64 64
*/
-static inline u32 ceph_ino_to_ino32(ino_t ino)
+static inline u32 ceph_ino_to_ino32(__u64 vino)
{
- ino ^= ino >> (sizeof(ino) * 8 - 32);
+ u32 ino = vino & 0xffffffff;
+ ino ^= vino >> 32;
if (!ino)
ino = 1;
return ino;
@@ -357,11 +375,11 @@ static inline u32 ceph_ino_to_ino32(ino_t ino)
*/
static inline ino_t ceph_vino_to_ino(struct ceph_vino vino)
{
- ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */
#if BITS_PER_LONG == 32
- ino = ceph_ino_to_ino32(ino);
+ return ceph_ino_to_ino32(vino.ino);
+#else
+ return (ino_t)vino.ino;
#endif
- return ino;
}
/*
@@ -413,7 +431,6 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
/*
* Ceph inode.
*/
-#define CEPH_I_COMPLETE 1 /* we have complete directory cached */
#define CEPH_I_NODELAY 4 /* do not delay cap release */
#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
@@ -422,18 +439,18 @@ static inline void ceph_i_clear(struct inode *inode, unsigned mask)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags &= ~mask;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
static inline void ceph_i_set(struct inode *inode, unsigned mask)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
ci->i_ceph_flags |= mask;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
}
static inline bool ceph_i_test(struct inode *inode, unsigned mask)
@@ -441,9 +458,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask)
struct ceph_inode_info *ci = ceph_inode(inode);
bool r;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
r = (ci->i_ceph_flags & mask) == mask;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return r;
}
@@ -471,6 +488,13 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off)
}
/*
+ * set/clear directory D_COMPLETE flag
+ */
+void ceph_dir_set_complete(struct inode *inode);
+void ceph_dir_clear_complete(struct inode *inode);
+bool ceph_dir_test_complete(struct inode *inode);
+
+/*
* caps helpers
*/
static inline bool __ceph_is_any_real_caps(struct ceph_inode_info *ci)
@@ -486,9 +510,9 @@ extern int __ceph_caps_issued_other(struct ceph_inode_info *ci,
static inline int ceph_caps_issued(struct ceph_inode_info *ci)
{
int issued;
- spin_lock(&ci->vfs_inode.i_lock);
+ spin_lock(&ci->i_ceph_lock);
issued = __ceph_caps_issued(ci, NULL);
- spin_unlock(&ci->vfs_inode.i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return issued;
}
@@ -496,9 +520,9 @@ static inline int ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask,
int touch)
{
int r;
- spin_lock(&ci->vfs_inode.i_lock);
+ spin_lock(&ci->i_ceph_lock);
r = __ceph_caps_issued_mask(ci, mask, touch);
- spin_unlock(&ci->vfs_inode.i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return r;
}
@@ -543,13 +567,16 @@ extern void ceph_reservation_status(struct ceph_fs_client *client,
/*
* we keep buffered readdir results attached to file->private_data
*/
+#define CEPH_F_SYNC 1
+#define CEPH_F_ATEND 2
+
struct ceph_file_info {
- int fmode; /* initialized on open */
+ short fmode; /* initialized on open */
+ short flags; /* CEPH_F_* */
/* readdir: position within the dir */
u32 frag;
struct ceph_mds_request *last_readdir;
- int at_end;
/* readdir: position within a frag */
unsigned offset; /* offset of last chunk, adjusted for . and .. */
@@ -650,6 +677,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+extern int ceph_snap_init(void);
+extern void ceph_snap_exit(void);
/*
* a cap_snap is "pending" if it is still awaiting an in-progress
@@ -692,7 +721,7 @@ extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_writeback(struct inode *inode);
extern int ceph_do_getattr(struct inode *inode, int mask);
-extern int ceph_permission(struct inode *inode, int mask, unsigned int flags);
+extern int ceph_permission(struct inode *inode, int mask);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
@@ -718,17 +747,17 @@ extern int ceph_add_cap(struct inode *inode,
extern void __ceph_remove_cap(struct ceph_cap *cap);
static inline void ceph_remove_cap(struct ceph_cap *cap)
{
- struct inode *inode = &cap->ci->vfs_inode;
- spin_lock(&inode->i_lock);
+ spin_lock(&cap->ci->i_ceph_lock);
__ceph_remove_cap(cap);
- spin_unlock(&inode->i_lock);
+ spin_unlock(&cap->ci->i_ceph_lock);
}
extern void ceph_put_cap(struct ceph_mds_client *mdsc,
struct ceph_cap *cap);
extern void ceph_queue_caps_release(struct inode *inode);
extern int ceph_write_inode(struct inode *inode, struct writeback_control *wbc);
-extern int ceph_fsync(struct file *file, int datasync);
+extern int ceph_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync);
extern void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
struct ceph_mds_session *session);
extern struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci,
@@ -788,6 +817,8 @@ extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops,
ceph_snapdir_dentry_ops;
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
+extern int ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err);
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err);
@@ -795,7 +826,8 @@ extern void ceph_dentry_lru_add(struct dentry *dn);
extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
-extern unsigned ceph_dentry_hash(struct dentry *dn);
+extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
+extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
/*
* our d_ops vary depending on whether the inode is live,
@@ -818,14 +850,6 @@ extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p,
int p_locks, int f_locks);
extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c);
-static inline struct inode *get_dentry_parent_inode(struct dentry *dentry)
-{
- if (dentry && dentry->d_parent)
- return dentry->d_parent->d_inode;
-
- return NULL;
-}
-
/* debugfs.c */
extern int ceph_fs_debugfs_init(struct ceph_fs_client *client);
extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index f42d730..a5e36e4 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -343,8 +343,8 @@ void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
}
static int __build_xattrs(struct inode *inode)
- __releases(inode->i_lock)
- __acquires(inode->i_lock)
+ __releases(ci->i_ceph_lock)
+ __acquires(ci->i_ceph_lock)
{
u32 namelen;
u32 numattr = 0;
@@ -372,7 +372,7 @@ start:
end = p + ci->i_xattrs.blob->vec.iov_len;
ceph_decode_32_safe(&p, end, numattr, bad);
xattr_version = ci->i_xattrs.version;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
GFP_NOFS);
@@ -387,7 +387,7 @@ start:
goto bad_lock;
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_xattrs.version != xattr_version) {
/* lost a race, retry */
for (i = 0; i < numattr; i++)
@@ -418,7 +418,7 @@ start:
return err;
bad_lock:
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
bad:
if (xattrs) {
for (i = 0; i < numattr; i++)
@@ -512,7 +512,7 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
if (vxattrs)
vxattr = ceph_match_vxattr(vxattrs, name);
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
ci->i_xattrs.version, ci->i_xattrs.index_version);
@@ -520,14 +520,14 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
(ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
goto get_xattr;
} else {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
/* get xattrs from mds (if we don't already have them) */
err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
if (err)
return err;
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (vxattr && vxattr->readonly) {
err = vxattr->getxattr_cb(ci, value, size);
@@ -558,7 +558,7 @@ get_xattr:
memcpy(value, xattr->val, xattr->val_len);
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return err;
}
@@ -573,7 +573,7 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
u32 len;
int i;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
ci->i_xattrs.version, ci->i_xattrs.index_version);
@@ -581,13 +581,13 @@ ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
(ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
goto list_xattr;
} else {
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
if (err)
return err;
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
err = __build_xattrs(inode);
if (err < 0)
@@ -619,7 +619,7 @@ list_xattr:
}
out:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
return err;
}
@@ -629,7 +629,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *inode = dentry->d_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct inode *parent_inode = dentry->d_parent->d_inode;
+ struct inode *parent_inode;
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = fsc->mdsc;
int err;
@@ -677,7 +677,9 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
req->r_data_len = size;
dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
+ parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
ceph_mdsc_put_request(req);
dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
@@ -737,7 +739,7 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if (!xattr)
goto out;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
retry:
issued = __ceph_caps_issued(ci, NULL);
if (!(issued & CEPH_CAP_XATTR_EXCL))
@@ -750,12 +752,12 @@ retry:
required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
struct ceph_buffer *blob = NULL;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
dout(" preaallocating new blob size=%d\n", required_blob_size);
blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
if (!blob)
goto out;
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
if (ci->i_xattrs.prealloc_blob)
ceph_buffer_put(ci->i_xattrs.prealloc_blob);
ci->i_xattrs.prealloc_blob = blob;
@@ -768,13 +770,13 @@ retry:
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
return err;
do_sync:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
err = ceph_sync_setxattr(dentry, name, value, size, flags);
out:
kfree(newname);
@@ -788,7 +790,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct inode *inode = dentry->d_inode;
- struct inode *parent_inode = dentry->d_parent->d_inode;
+ struct inode *parent_inode;
struct ceph_mds_request *req;
int err;
@@ -802,7 +804,9 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name)
req->r_num_caps = 1;
req->r_path2 = kstrdup(name, GFP_NOFS);
+ parent_inode = ceph_get_dentry_parent_inode(dentry);
err = ceph_mdsc_do_request(mdsc, parent_inode, req);
+ iput(parent_inode);
ceph_mdsc_put_request(req);
return err;
}
@@ -829,7 +833,7 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
return -EOPNOTSUPP;
}
- spin_lock(&inode->i_lock);
+ spin_lock(&ci->i_ceph_lock);
__build_xattrs(inode);
issued = __ceph_caps_issued(ci, NULL);
dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));
@@ -842,12 +846,12 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
ci->i_xattrs.dirty = true;
inode->i_ctime = CURRENT_TIME;
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
if (dirty)
__mark_inode_dirty(inode, dirty);
return err;
do_sync:
- spin_unlock(&inode->i_lock);
+ spin_unlock(&ci->i_ceph_lock);
err = ceph_send_removexattr(dentry, name);
return err;
}
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 6901578..4b2e5cb 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -95,7 +95,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
spin_lock(&parent->d_lock);
list_for_each(child, &parent->d_subdirs)
{
- de = list_entry(child, struct dentry, d_u.d_child);
+ de = list_entry(child, struct dentry, d_child);
/* don't know what to do with negative dentries */
if ( ! de->d_inode )
continue;
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h
index 6b443ff..b7143cf 100644
--- a/fs/coda/coda_int.h
+++ b/fs/coda/coda_int.h
@@ -11,7 +11,7 @@ extern int coda_fake_statfs;
void coda_destroy_inodecache(void);
int coda_init_inodecache(void);
-int coda_fsync(struct file *coda_file, int datasync);
+int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync);
void coda_sysctl_init(void);
void coda_sysctl_clean(void);
diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 2bdbcc1..854ace7 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -104,7 +104,7 @@ void coda_vattr_to_iattr(struct inode *inode, struct coda_vattr *attr)
if (attr->va_gid != -1)
inode->i_gid = (gid_t) attr->va_gid;
if (attr->va_nlink != -1)
- inode->i_nlink = attr->va_nlink;
+ set_nlink(inode, attr->va_nlink);
if (attr->va_size != -1)
inode->i_size = attr->va_size;
if (attr->va_size != -1)
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index 9b0c532..cc0ea9f 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -39,7 +39,7 @@ extern const struct file_operations coda_ioctl_operations;
/* operations shared over more than one file */
int coda_open(struct inode *i, struct file *f);
int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask, unsigned int flags);
+int coda_permission(struct inode *inode, int mask);
int coda_revalidate_inode(struct dentry *);
int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *);
int coda_setattr(struct dentry *, struct iattr *);
@@ -59,12 +59,11 @@ void coda_sysctl_clean(void);
#define CODA_ALLOC(ptr, cast, size) do { \
if (size < PAGE_SIZE) \
- ptr = kmalloc((unsigned long) size, GFP_KERNEL); \
+ ptr = kzalloc((unsigned long) size, GFP_KERNEL); \
else \
- ptr = (cast)vmalloc((unsigned long) size); \
+ ptr = (cast)vzalloc((unsigned long) size); \
if (!ptr) \
printk("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \
- else memset( ptr, 0, size ); \
} while (0)
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 2b8dae4..28e7e13 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -132,11 +132,11 @@ exit:
}
-int coda_permission(struct inode *inode, int mask, unsigned int flags)
+int coda_permission(struct inode *inode, int mask)
{
int error;
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
@@ -340,7 +340,7 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
if (!error) {
/* VFS may delete the child */
if (de->d_inode)
- de->d_inode->i_nlink = 0;
+ clear_nlink(de->d_inode);
/* fix the link count of the parent */
coda_dir_drop_nlink(dir);
@@ -449,8 +449,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
struct file *host_file;
struct dentry *de;
struct venus_dirent *vdir;
- unsigned long vdir_size =
- (unsigned long)(&((struct venus_dirent *)0)->d_name);
+ unsigned long vdir_size = offsetof(struct venus_dirent, d_name);
unsigned int type;
struct qstr name;
ino_t ino;
@@ -474,7 +473,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf,
coda_file->f_pos++;
}
if (coda_file->f_pos == 1) {
- ret = filldir(buf, "..", 2, 1, de->d_parent->d_inode->i_ino, DT_DIR);
+ ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR);
if (ret < 0)
goto out;
result++;
diff --git a/fs/coda/file.c b/fs/coda/file.c
index 0433057..8edd404 100644
--- a/fs/coda/file.c
+++ b/fs/coda/file.c
@@ -199,7 +199,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file)
return 0;
}
-int coda_fsync(struct file *coda_file, int datasync)
+int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync)
{
struct file *host_file;
struct inode *coda_inode = coda_file->f_path.dentry->d_inode;
@@ -210,6 +210,11 @@ int coda_fsync(struct file *coda_file, int datasync)
S_ISLNK(coda_inode->i_mode)))
return -EINVAL;
+ err = filemap_write_and_wait_range(coda_inode->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&coda_inode->i_mutex);
+
cfi = CODA_FTOC(coda_file);
BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC);
host_file = cfi->cfi_container;
@@ -217,6 +222,7 @@ int coda_fsync(struct file *coda_file, int datasync)
err = vfs_fsync(host_file, datasync);
if (!err && !datasync)
err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode));
+ mutex_unlock(&coda_inode->i_mutex);
return err;
}
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c
index cb140ef..ee0981f 100644
--- a/fs/coda/pioctl.c
+++ b/fs/coda/pioctl.c
@@ -24,7 +24,7 @@
#include "coda_linux.h"
/* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags);
+static int coda_ioctl_permission(struct inode *inode, int mask);
static long coda_pioctl(struct file *filp, unsigned int cmd,
unsigned long user_data);
@@ -41,7 +41,7 @@ const struct file_operations coda_ioctl_operations = {
};
/* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask, unsigned int flags)
+static int coda_ioctl_permission(struct inode *inode, int mask)
{
return (mask & MAY_EXEC) ? -EACCES : 0;
}
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 9a37a9b..5ef72c8 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
- BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- sd->s_dentry = NULL;
+ /* Coordinate with configfs_attach_attr where will increase
+ * sd->s_count and update sd->s_dentry to new allocated one.
+ * Only set sd->dentry to null when this dentry is the only
+ * sd owner.
+ * If not do so, configfs_d_iput may run just after
+ * configfs_attach_attr and set sd->s_dentry to null
+ * even it's still in use.
+ */
+ if (atomic_read(&sd->s_count) <= 2)
+ sd->s_dentry = NULL;
+
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
@@ -436,8 +445,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
+ spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
+
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index c83f476..9d8715c 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -23,7 +23,8 @@
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*
- * Please see Documentation/filesystems/configfs.txt for more information.
+ * Please see Documentation/filesystems/configfs/configfs.txt for more
+ * information.
*/
#undef DEBUG
@@ -291,7 +292,7 @@ int __init configfs_inode_init(void)
return bdi_init(&configfs_backing_dev_info);
}
-void __exit configfs_inode_exit(void)
+void configfs_inode_exit(void)
{
bdi_destroy(&configfs_backing_dev_info);
}
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index 76dc4c3..50cee7f 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -23,7 +23,7 @@
*
* configfs Copyright (C) 2005 Oracle. All rights reserved.
*
- * Please see the file Documentation/filesystems/configfs.txt for
+ * Please see the file Documentation/filesystems/configfs/configfs.txt for
* critical information about using the config_item interface.
*/
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index ecc6217..276e15c 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -143,28 +143,26 @@ static int __init configfs_init(void)
goto out;
config_kobj = kobject_create_and_add("config", kernel_kobj);
- if (!config_kobj) {
- kmem_cache_destroy(configfs_dir_cachep);
- configfs_dir_cachep = NULL;
- goto out;
- }
+ if (!config_kobj)
+ goto out2;
+
+ err = configfs_inode_init();
+ if (err)
+ goto out3;
err = register_filesystem(&configfs_fs_type);
- if (err) {
- printk(KERN_ERR "configfs: Unable to register filesystem!\n");
- kobject_put(config_kobj);
- kmem_cache_destroy(configfs_dir_cachep);
- configfs_dir_cachep = NULL;
- goto out;
- }
+ if (err)
+ goto out4;
- err = configfs_inode_init();
- if (err) {
- unregister_filesystem(&configfs_fs_type);
- kobject_put(config_kobj);
- kmem_cache_destroy(configfs_dir_cachep);
- configfs_dir_cachep = NULL;
- }
+ return 0;
+out4:
+ printk(KERN_ERR "configfs: Unable to register filesystem!\n");
+ configfs_inode_exit();
+out3:
+ kobject_put(config_kobj);
+out2:
+ kmem_cache_destroy(configfs_dir_cachep);
+ configfs_dir_cachep = NULL;
out:
return err;
}
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
index abc49f2..90e5997 100644
--- a/fs/dlm/ast.c
+++ b/fs/dlm/ast.c
@@ -14,17 +14,9 @@
#include "dlm_internal.h"
#include "lock.h"
#include "user.h"
-#include "ast.h"
-
-#define WAKE_ASTS 0
-
-static uint64_t ast_seq_count;
-static struct list_head ast_queue;
-static spinlock_t ast_queue_lock;
-static struct task_struct * astd_task;
-static unsigned long astd_wakeflags;
-static struct mutex astd_running;
+static uint64_t dlm_cb_seq;
+static spinlock_t dlm_cb_seq_spin;
static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
{
@@ -57,21 +49,13 @@ static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
}
}
-void dlm_del_ast(struct dlm_lkb *lkb)
-{
- spin_lock(&ast_queue_lock);
- if (!list_empty(&lkb->lkb_astqueue))
- list_del_init(&lkb->lkb_astqueue);
- spin_unlock(&ast_queue_lock);
-}
-
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
uint64_t prev_seq;
int prev_mode;
- int i;
+ int i, rv;
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
if (lkb->lkb_callbacks[i].seq)
@@ -100,7 +84,8 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
mode,
(unsigned long long)prev_seq,
prev_mode);
- return 0;
+ rv = 0;
+ goto out;
}
}
@@ -109,6 +94,7 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
lkb->lkb_callbacks[i].mode = mode;
lkb->lkb_callbacks[i].sb_status = status;
lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
+ rv = 0;
break;
}
@@ -117,21 +103,24 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
lkb->lkb_id, (unsigned long long)seq,
flags, mode, status, sbflags);
dlm_dump_lkb_callbacks(lkb);
- return -1;
+ rv = -1;
+ goto out;
}
-
- return 0;
+ out:
+ return rv;
}
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid)
{
- int i;
+ int i, rv;
*resid = 0;
- if (!lkb->lkb_callbacks[0].seq)
- return -ENOENT;
+ if (!lkb->lkb_callbacks[0].seq) {
+ rv = -ENOENT;
+ goto out;
+ }
/* oldest undelivered cb is callbacks[0] */
@@ -163,7 +152,8 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
cb->mode,
(unsigned long long)lkb->lkb_last_cast.seq,
lkb->lkb_last_cast.mode);
- return 0;
+ rv = 0;
+ goto out;
}
}
@@ -176,171 +166,150 @@ int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
lkb->lkb_last_bast_time = ktime_get();
}
-
- return 0;
+ rv = 0;
+ out:
+ return rv;
}
-void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
- uint32_t sbflags)
+void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+ uint32_t sbflags)
{
- uint64_t seq;
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+ uint64_t new_seq, prev_seq;
int rv;
- spin_lock(&ast_queue_lock);
-
- seq = ++ast_seq_count;
+ spin_lock(&dlm_cb_seq_spin);
+ new_seq = ++dlm_cb_seq;
+ spin_unlock(&dlm_cb_seq_spin);
if (lkb->lkb_flags & DLM_IFL_USER) {
- spin_unlock(&ast_queue_lock);
- dlm_user_add_ast(lkb, flags, mode, status, sbflags, seq);
+ dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
return;
}
- rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, seq);
- if (rv < 0) {
- spin_unlock(&ast_queue_lock);
- return;
- }
+ mutex_lock(&lkb->lkb_cb_mutex);
+ prev_seq = lkb->lkb_callbacks[0].seq;
- if (list_empty(&lkb->lkb_astqueue)) {
+ rv = dlm_add_lkb_callback(lkb, flags, mode, status, sbflags, new_seq);
+ if (rv < 0)
+ goto out;
+
+ if (!prev_seq) {
kref_get(&lkb->lkb_ref);
- list_add_tail(&lkb->lkb_astqueue, &ast_queue);
- }
- spin_unlock(&ast_queue_lock);
- set_bit(WAKE_ASTS, &astd_wakeflags);
- wake_up_process(astd_task);
+ if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
+ mutex_lock(&ls->ls_cb_mutex);
+ list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
+ mutex_unlock(&ls->ls_cb_mutex);
+ } else {
+ queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+ }
+ }
+ out:
+ mutex_unlock(&lkb->lkb_cb_mutex);
}
-static void process_asts(void)
+void dlm_callback_work(struct work_struct *work)
{
- struct dlm_ls *ls = NULL;
- struct dlm_rsb *r = NULL;
- struct dlm_lkb *lkb;
+ struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
+ struct dlm_ls *ls = lkb->lkb_resource->res_ls;
void (*castfn) (void *astparam);
void (*bastfn) (void *astparam, int mode);
struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
int i, rv, resid;
-repeat:
- spin_lock(&ast_queue_lock);
- list_for_each_entry(lkb, &ast_queue, lkb_astqueue) {
- r = lkb->lkb_resource;
- ls = r->res_ls;
+ memset(&callbacks, 0, sizeof(callbacks));
- if (dlm_locking_stopped(ls))
- continue;
-
- /* we remove from astqueue list and remove everything in
- lkb_callbacks before releasing the spinlock so empty
- lkb_astqueue is always consistent with empty lkb_callbacks */
-
- list_del_init(&lkb->lkb_astqueue);
-
- castfn = lkb->lkb_astfn;
- bastfn = lkb->lkb_bastfn;
+ mutex_lock(&lkb->lkb_cb_mutex);
+ if (!lkb->lkb_callbacks[0].seq) {
+ /* no callback work exists, shouldn't happen */
+ log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
+ dlm_print_lkb(lkb);
+ dlm_dump_lkb_callbacks(lkb);
+ }
- memset(&callbacks, 0, sizeof(callbacks));
+ for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+ rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
+ if (rv < 0)
+ break;
+ }
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
- if (rv < 0)
- break;
- }
- spin_unlock(&ast_queue_lock);
+ if (resid) {
+ /* cbs remain, loop should have removed all, shouldn't happen */
+ log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
+ resid);
+ dlm_print_lkb(lkb);
+ dlm_dump_lkb_callbacks(lkb);
+ }
+ mutex_unlock(&lkb->lkb_cb_mutex);
- if (resid) {
- /* shouldn't happen, for loop should have removed all */
- log_error(ls, "callback resid %d lkb %x",
- resid, lkb->lkb_id);
- }
+ castfn = lkb->lkb_astfn;
+ bastfn = lkb->lkb_bastfn;
- for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
- if (!callbacks[i].seq)
- break;
- if (callbacks[i].flags & DLM_CB_SKIP) {
- continue;
- } else if (callbacks[i].flags & DLM_CB_BAST) {
- bastfn(lkb->lkb_astparam, callbacks[i].mode);
- } else if (callbacks[i].flags & DLM_CB_CAST) {
- lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
- lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
- castfn(lkb->lkb_astparam);
- }
+ for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
+ if (!callbacks[i].seq)
+ break;
+ if (callbacks[i].flags & DLM_CB_SKIP) {
+ continue;
+ } else if (callbacks[i].flags & DLM_CB_BAST) {
+ bastfn(lkb->lkb_astparam, callbacks[i].mode);
+ } else if (callbacks[i].flags & DLM_CB_CAST) {
+ lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
+ lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
+ castfn(lkb->lkb_astparam);
}
-
- /* removes ref for ast_queue, may cause lkb to be freed */
- dlm_put_lkb(lkb);
-
- cond_resched();
- goto repeat;
}
- spin_unlock(&ast_queue_lock);
-}
-
-static inline int no_asts(void)
-{
- int ret;
- spin_lock(&ast_queue_lock);
- ret = list_empty(&ast_queue);
- spin_unlock(&ast_queue_lock);
- return ret;
+ /* undo kref_get from dlm_add_callback, may cause lkb to be freed */
+ dlm_put_lkb(lkb);
}
-static int dlm_astd(void *data)
+int dlm_callback_start(struct dlm_ls *ls)
{
- while (!kthread_should_stop()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!test_bit(WAKE_ASTS, &astd_wakeflags))
- schedule();
- set_current_state(TASK_RUNNING);
-
- mutex_lock(&astd_running);
- if (test_and_clear_bit(WAKE_ASTS, &astd_wakeflags))
- process_asts();
- mutex_unlock(&astd_running);
+ ls->ls_callback_wq = alloc_workqueue("dlm_callback",
+ WQ_UNBOUND |
+ WQ_MEM_RECLAIM |
+ WQ_NON_REENTRANT,
+ 0);
+ if (!ls->ls_callback_wq) {
+ log_print("can't start dlm_callback workqueue");
+ return -ENOMEM;
}
return 0;
}
-void dlm_astd_wake(void)
+void dlm_callback_stop(struct dlm_ls *ls)
{
- if (!no_asts()) {
- set_bit(WAKE_ASTS, &astd_wakeflags);
- wake_up_process(astd_task);
- }
+ if (ls->ls_callback_wq)
+ destroy_workqueue(ls->ls_callback_wq);
}
-int dlm_astd_start(void)
+void dlm_callback_suspend(struct dlm_ls *ls)
{
- struct task_struct *p;
- int error = 0;
-
- INIT_LIST_HEAD(&ast_queue);
- spin_lock_init(&ast_queue_lock);
- mutex_init(&astd_running);
-
- p = kthread_run(dlm_astd, NULL, "dlm_astd");
- if (IS_ERR(p))
- error = PTR_ERR(p);
- else
- astd_task = p;
- return error;
-}
+ set_bit(LSFL_CB_DELAY, &ls->ls_flags);
-void dlm_astd_stop(void)
-{
- kthread_stop(astd_task);
+ if (ls->ls_callback_wq)
+ flush_workqueue(ls->ls_callback_wq);
}
-void dlm_astd_suspend(void)
+void dlm_callback_resume(struct dlm_ls *ls)
{
- mutex_lock(&astd_running);
-}
+ struct dlm_lkb *lkb, *safe;
+ int count = 0;
-void dlm_astd_resume(void)
-{
- mutex_unlock(&astd_running);
+ clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
+
+ if (!ls->ls_callback_wq)
+ return;
+
+ mutex_lock(&ls->ls_cb_mutex);
+ list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
+ list_del_init(&lkb->lkb_cb_list);
+ queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
+ count++;
+ }
+ mutex_unlock(&ls->ls_cb_mutex);
+
+ log_debug(ls, "dlm_callback_resume %d", count);
}
diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
index 8aa89c9..757b551 100644
--- a/fs/dlm/ast.h
+++ b/fs/dlm/ast.h
@@ -18,14 +18,15 @@ int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
int status, uint32_t sbflags, uint64_t seq);
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid);
-void dlm_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
- uint32_t sbflags);
+void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode, int status,
+ uint32_t sbflags);
-void dlm_astd_wake(void);
-int dlm_astd_start(void);
-void dlm_astd_stop(void);
-void dlm_astd_suspend(void);
-void dlm_astd_resume(void);
+void dlm_callback_work(struct work_struct *work);
+int dlm_callback_start(struct dlm_ls *ls);
+void dlm_callback_stop(struct dlm_ls *ls);
+void dlm_callback_suspend(struct dlm_ls *ls);
+void dlm_callback_resume(struct dlm_ls *ls);
#endif
+
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 9b026ea..6cf72fc 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -28,7 +28,8 @@
* /config/dlm/<cluster>/spaces/<space>/nodes/<node>/weight
* /config/dlm/<cluster>/comms/<comm>/nodeid
* /config/dlm/<cluster>/comms/<comm>/local
- * /config/dlm/<cluster>/comms/<comm>/addr
+ * /config/dlm/<cluster>/comms/<comm>/addr (write only)
+ * /config/dlm/<cluster>/comms/<comm>/addr_list (read only)
* The <cluster> level is useless, but I haven't figured out how to avoid it.
*/
@@ -80,6 +81,7 @@ static ssize_t comm_local_write(struct dlm_comm *cm, const char *buf,
size_t len);
static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf,
size_t len);
+static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf);
static ssize_t node_nodeid_read(struct dlm_node *nd, char *buf);
static ssize_t node_nodeid_write(struct dlm_node *nd, const char *buf,
size_t len);
@@ -92,7 +94,6 @@ struct dlm_cluster {
unsigned int cl_tcp_port;
unsigned int cl_buffer_size;
unsigned int cl_rsbtbl_size;
- unsigned int cl_lkbtbl_size;
unsigned int cl_dirtbl_size;
unsigned int cl_recover_timer;
unsigned int cl_toss_secs;
@@ -101,13 +102,13 @@ struct dlm_cluster {
unsigned int cl_protocol;
unsigned int cl_timewarn_cs;
unsigned int cl_waitwarn_us;
+ unsigned int cl_new_rsb_count;
};
enum {
CLUSTER_ATTR_TCP_PORT = 0,
CLUSTER_ATTR_BUFFER_SIZE,
CLUSTER_ATTR_RSBTBL_SIZE,
- CLUSTER_ATTR_LKBTBL_SIZE,
CLUSTER_ATTR_DIRTBL_SIZE,
CLUSTER_ATTR_RECOVER_TIMER,
CLUSTER_ATTR_TOSS_SECS,
@@ -116,6 +117,7 @@ enum {
CLUSTER_ATTR_PROTOCOL,
CLUSTER_ATTR_TIMEWARN_CS,
CLUSTER_ATTR_WAITWARN_US,
+ CLUSTER_ATTR_NEW_RSB_COUNT,
};
struct cluster_attribute {
@@ -160,7 +162,6 @@ __CONFIGFS_ATTR(name, 0644, name##_read, name##_write)
CLUSTER_ATTR(tcp_port, 1);
CLUSTER_ATTR(buffer_size, 1);
CLUSTER_ATTR(rsbtbl_size, 1);
-CLUSTER_ATTR(lkbtbl_size, 1);
CLUSTER_ATTR(dirtbl_size, 1);
CLUSTER_ATTR(recover_timer, 1);
CLUSTER_ATTR(toss_secs, 1);
@@ -169,12 +170,12 @@ CLUSTER_ATTR(log_debug, 0);
CLUSTER_ATTR(protocol, 0);
CLUSTER_ATTR(timewarn_cs, 1);
CLUSTER_ATTR(waitwarn_us, 0);
+CLUSTER_ATTR(new_rsb_count, 0);
static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
[CLUSTER_ATTR_BUFFER_SIZE] = &cluster_attr_buffer_size.attr,
[CLUSTER_ATTR_RSBTBL_SIZE] = &cluster_attr_rsbtbl_size.attr,
- [CLUSTER_ATTR_LKBTBL_SIZE] = &cluster_attr_lkbtbl_size.attr,
[CLUSTER_ATTR_DIRTBL_SIZE] = &cluster_attr_dirtbl_size.attr,
[CLUSTER_ATTR_RECOVER_TIMER] = &cluster_attr_recover_timer.attr,
[CLUSTER_ATTR_TOSS_SECS] = &cluster_attr_toss_secs.attr,
@@ -183,6 +184,7 @@ static struct configfs_attribute *cluster_attrs[] = {
[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
[CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr,
+ [CLUSTER_ATTR_NEW_RSB_COUNT] = &cluster_attr_new_rsb_count.attr,
NULL,
};
@@ -190,6 +192,7 @@ enum {
COMM_ATTR_NODEID = 0,
COMM_ATTR_LOCAL,
COMM_ATTR_ADDR,
+ COMM_ATTR_ADDR_LIST,
};
struct comm_attribute {
@@ -217,14 +220,22 @@ static struct comm_attribute comm_attr_local = {
static struct comm_attribute comm_attr_addr = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "addr",
- .ca_mode = S_IRUGO | S_IWUSR },
+ .ca_mode = S_IWUSR },
.store = comm_addr_write,
};
+static struct comm_attribute comm_attr_addr_list = {
+ .attr = { .ca_owner = THIS_MODULE,
+ .ca_name = "addr_list",
+ .ca_mode = S_IRUGO },
+ .show = comm_addr_list_read,
+};
+
static struct configfs_attribute *comm_attrs[] = {
[COMM_ATTR_NODEID] = &comm_attr_nodeid.attr,
[COMM_ATTR_LOCAL] = &comm_attr_local.attr,
[COMM_ATTR_ADDR] = &comm_attr_addr.attr,
+ [COMM_ATTR_ADDR_LIST] = &comm_attr_addr_list.attr,
NULL,
};
@@ -435,7 +446,6 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
cl->cl_rsbtbl_size = dlm_config.ci_rsbtbl_size;
- cl->cl_lkbtbl_size = dlm_config.ci_lkbtbl_size;
cl->cl_dirtbl_size = dlm_config.ci_dirtbl_size;
cl->cl_recover_timer = dlm_config.ci_recover_timer;
cl->cl_toss_secs = dlm_config.ci_toss_secs;
@@ -444,6 +454,7 @@ static struct config_group *make_cluster(struct config_group *g,
cl->cl_protocol = dlm_config.ci_protocol;
cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us;
+ cl->cl_new_rsb_count = dlm_config.ci_new_rsb_count;
space_list = &sps->ss_group;
comm_list = &cms->cs_group;
@@ -720,6 +731,50 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
return len;
}
+static ssize_t comm_addr_list_read(struct dlm_comm *cm, char *buf)
+{
+ ssize_t s;
+ ssize_t allowance;
+ int i;
+ struct sockaddr_storage *addr;
+ struct sockaddr_in *addr_in;
+ struct sockaddr_in6 *addr_in6;
+
+ /* Taken from ip6_addr_string() defined in lib/vsprintf.c */
+ char buf0[sizeof("AF_INET6 xxxx:xxxx:xxxx:xxxx:xxxx:xxxx:255.255.255.255\n")];
+
+
+ /* Derived from SIMPLE_ATTR_SIZE of fs/configfs/file.c */
+ allowance = 4096;
+ buf[0] = '\0';
+
+ for (i = 0; i < cm->addr_count; i++) {
+ addr = cm->addr[i];
+
+ switch(addr->ss_family) {
+ case AF_INET:
+ addr_in = (struct sockaddr_in *)addr;
+ s = sprintf(buf0, "AF_INET %pI4\n", &addr_in->sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ addr_in6 = (struct sockaddr_in6 *)addr;
+ s = sprintf(buf0, "AF_INET6 %pI6\n", &addr_in6->sin6_addr);
+ break;
+ default:
+ s = sprintf(buf0, "%s\n", "<UNKNOWN>");
+ break;
+ }
+ allowance -= s;
+ if (allowance >= 0)
+ strcat(buf, buf0);
+ else {
+ allowance += s;
+ break;
+ }
+ }
+ return 4096 - allowance;
+}
+
static ssize_t show_node(struct config_item *i, struct configfs_attribute *a,
char *buf)
{
@@ -983,7 +1038,6 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_TCP_PORT 21064
#define DEFAULT_BUFFER_SIZE 4096
#define DEFAULT_RSBTBL_SIZE 1024
-#define DEFAULT_LKBTBL_SIZE 1024
#define DEFAULT_DIRTBL_SIZE 1024
#define DEFAULT_RECOVER_TIMER 5
#define DEFAULT_TOSS_SECS 10
@@ -992,12 +1046,12 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
#define DEFAULT_PROTOCOL 0
#define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */
#define DEFAULT_WAITWARN_US 0
+#define DEFAULT_NEW_RSB_COUNT 128
struct dlm_config_info dlm_config = {
.ci_tcp_port = DEFAULT_TCP_PORT,
.ci_buffer_size = DEFAULT_BUFFER_SIZE,
.ci_rsbtbl_size = DEFAULT_RSBTBL_SIZE,
- .ci_lkbtbl_size = DEFAULT_LKBTBL_SIZE,
.ci_dirtbl_size = DEFAULT_DIRTBL_SIZE,
.ci_recover_timer = DEFAULT_RECOVER_TIMER,
.ci_toss_secs = DEFAULT_TOSS_SECS,
@@ -1005,6 +1059,7 @@ struct dlm_config_info dlm_config = {
.ci_log_debug = DEFAULT_LOG_DEBUG,
.ci_protocol = DEFAULT_PROTOCOL,
.ci_timewarn_cs = DEFAULT_TIMEWARN_CS,
- .ci_waitwarn_us = DEFAULT_WAITWARN_US
+ .ci_waitwarn_us = DEFAULT_WAITWARN_US,
+ .ci_new_rsb_count = DEFAULT_NEW_RSB_COUNT
};
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index dd0ce24..3099d0d 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -20,7 +20,6 @@ struct dlm_config_info {
int ci_tcp_port;
int ci_buffer_size;
int ci_rsbtbl_size;
- int ci_lkbtbl_size;
int ci_dirtbl_size;
int ci_recover_timer;
int ci_toss_secs;
@@ -29,6 +28,7 @@ struct dlm_config_info {
int ci_protocol;
int ci_timewarn_cs;
int ci_waitwarn_us;
+ int ci_new_rsb_count;
};
extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 0262451..fe2860c 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -37,6 +37,7 @@
#include <linux/jhash.h>
#include <linux/miscdevice.h>
#include <linux/mutex.h>
+#include <linux/idr.h>
#include <asm/uaccess.h>
#include <linux/dlm.h>
@@ -52,7 +53,6 @@ struct dlm_ls;
struct dlm_lkb;
struct dlm_rsb;
struct dlm_member;
-struct dlm_lkbtable;
struct dlm_rsbtable;
struct dlm_dirtable;
struct dlm_direntry;
@@ -108,11 +108,6 @@ struct dlm_rsbtable {
spinlock_t lock;
};
-struct dlm_lkbtable {
- struct list_head list;
- rwlock_t lock;
- uint16_t counter;
-};
/*
* Lockspace member (per node in a ls)
@@ -248,17 +243,18 @@ struct dlm_lkb {
int8_t lkb_wait_count;
int lkb_wait_nodeid; /* for debugging */
- struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
struct list_head lkb_statequeue; /* rsb g/c/w list */
struct list_head lkb_rsb_lookup; /* waiting for rsb lookup */
struct list_head lkb_wait_reply; /* waiting for remote reply */
- struct list_head lkb_astqueue; /* need ast to be sent */
struct list_head lkb_ownqueue; /* list of locks for a process */
struct list_head lkb_time_list;
ktime_t lkb_timestamp;
ktime_t lkb_wait_time;
unsigned long lkb_timeout_cs;
+ struct mutex lkb_cb_mutex;
+ struct work_struct lkb_cb_work;
+ struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */
struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE];
struct dlm_callback lkb_last_cast;
struct dlm_callback lkb_last_bast;
@@ -299,7 +295,7 @@ struct dlm_rsb {
int res_recover_locks_count;
char *res_lvbptr;
- char res_name[1];
+ char res_name[DLM_RESNAME_MAXLEN+1];
};
/* find_rsb() flags */
@@ -465,12 +461,12 @@ struct dlm_ls {
unsigned long ls_scan_time;
struct kobject ls_kobj;
+ struct idr ls_lkbidr;
+ spinlock_t ls_lkbidr_spin;
+
struct dlm_rsbtable *ls_rsbtbl;
uint32_t ls_rsbtbl_size;
- struct dlm_lkbtable *ls_lkbtbl;
- uint32_t ls_lkbtbl_size;
-
struct dlm_dirtable *ls_dirtbl;
uint32_t ls_dirtbl_size;
@@ -483,6 +479,10 @@ struct dlm_ls {
struct mutex ls_timeout_mutex;
struct list_head ls_timeout;
+ spinlock_t ls_new_rsb_spin;
+ int ls_new_rsb_count;
+ struct list_head ls_new_rsb; /* new rsb structs */
+
struct list_head ls_nodes; /* current nodes in ls */
struct list_head ls_nodes_gone; /* dead node list, recovery */
int ls_num_nodes; /* number of nodes in ls */
@@ -506,8 +506,12 @@ struct dlm_ls {
struct miscdevice ls_device;
+ struct workqueue_struct *ls_callback_wq;
+
/* recovery related */
+ struct mutex ls_cb_mutex;
+ struct list_head ls_cb_delay; /* save for queue_work later */
struct timer_list ls_timer;
struct task_struct *ls_recoverd_task;
struct mutex ls_recoverd_active;
@@ -544,6 +548,7 @@ struct dlm_ls {
#define LSFL_RCOM_WAIT 4
#define LSFL_UEVENT_WAIT 5
#define LSFL_TIMEWARN 6
+#define LSFL_CB_DELAY 7
/* much of this is just saving user space pointers associated with the
lock that we pass back to the user lib with an ast */
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index f71d0b5..83b5e32 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -305,7 +305,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
rv = -EDEADLK;
}
- dlm_add_ast(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
+ dlm_add_cb(lkb, DLM_CB_CAST, lkb->lkb_grmode, rv, lkb->lkb_sbflags);
}
static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -319,7 +319,7 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
if (is_master_copy(lkb)) {
send_bast(r, lkb, rqmode);
} else {
- dlm_add_ast(lkb, DLM_CB_BAST, rqmode, 0, 0);
+ dlm_add_cb(lkb, DLM_CB_BAST, rqmode, 0, 0);
}
}
@@ -327,19 +327,68 @@ static void queue_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rqmode)
* Basic operations on rsb's and lkb's
*/
-static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
+static int pre_rsb_struct(struct dlm_ls *ls)
+{
+ struct dlm_rsb *r1, *r2;
+ int count = 0;
+
+ spin_lock(&ls->ls_new_rsb_spin);
+ if (ls->ls_new_rsb_count > dlm_config.ci_new_rsb_count / 2) {
+ spin_unlock(&ls->ls_new_rsb_spin);
+ return 0;
+ }
+ spin_unlock(&ls->ls_new_rsb_spin);
+
+ r1 = dlm_allocate_rsb(ls);
+ r2 = dlm_allocate_rsb(ls);
+
+ spin_lock(&ls->ls_new_rsb_spin);
+ if (r1) {
+ list_add(&r1->res_hashchain, &ls->ls_new_rsb);
+ ls->ls_new_rsb_count++;
+ }
+ if (r2) {
+ list_add(&r2->res_hashchain, &ls->ls_new_rsb);
+ ls->ls_new_rsb_count++;
+ }
+ count = ls->ls_new_rsb_count;
+ spin_unlock(&ls->ls_new_rsb_spin);
+
+ if (!count)
+ return -ENOMEM;
+ return 0;
+}
+
+/* If ls->ls_new_rsb is empty, return -EAGAIN, so the caller can
+ unlock any spinlocks, go back and call pre_rsb_struct again.
+ Otherwise, take an rsb off the list and return it. */
+
+static int get_rsb_struct(struct dlm_ls *ls, char *name, int len,
+ struct dlm_rsb **r_ret)
{
struct dlm_rsb *r;
+ int count;
- r = dlm_allocate_rsb(ls, len);
- if (!r)
- return NULL;
+ spin_lock(&ls->ls_new_rsb_spin);
+ if (list_empty(&ls->ls_new_rsb)) {
+ count = ls->ls_new_rsb_count;
+ spin_unlock(&ls->ls_new_rsb_spin);
+ log_debug(ls, "find_rsb retry %d %d %s",
+ count, dlm_config.ci_new_rsb_count, name);
+ return -EAGAIN;
+ }
+
+ r = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb, res_hashchain);
+ list_del(&r->res_hashchain);
+ ls->ls_new_rsb_count--;
+ spin_unlock(&ls->ls_new_rsb_spin);
r->res_ls = ls;
r->res_length = len;
memcpy(r->res_name, name, len);
mutex_init(&r->res_mutex);
+ INIT_LIST_HEAD(&r->res_hashchain);
INIT_LIST_HEAD(&r->res_lookup);
INIT_LIST_HEAD(&r->res_grantqueue);
INIT_LIST_HEAD(&r->res_convertqueue);
@@ -347,7 +396,8 @@ static struct dlm_rsb *create_rsb(struct dlm_ls *ls, char *name, int len)
INIT_LIST_HEAD(&r->res_root_list);
INIT_LIST_HEAD(&r->res_recover_list);
- return r;
+ *r_ret = r;
+ return 0;
}
static int search_rsb_list(struct list_head *head, char *name, int len,
@@ -405,16 +455,6 @@ static int _search_rsb(struct dlm_ls *ls, char *name, int len, int b,
return error;
}
-static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
- unsigned int flags, struct dlm_rsb **r_ret)
-{
- int error;
- spin_lock(&ls->ls_rsbtbl[b].lock);
- error = _search_rsb(ls, name, len, b, flags, r_ret);
- spin_unlock(&ls->ls_rsbtbl[b].lock);
- return error;
-}
-
/*
* Find rsb in rsbtbl and potentially create/add one
*
@@ -432,35 +472,48 @@ static int search_rsb(struct dlm_ls *ls, char *name, int len, int b,
static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
unsigned int flags, struct dlm_rsb **r_ret)
{
- struct dlm_rsb *r = NULL, *tmp;
+ struct dlm_rsb *r = NULL;
uint32_t hash, bucket;
- int error = -EINVAL;
+ int error;
- if (namelen > DLM_RESNAME_MAXLEN)
+ if (namelen > DLM_RESNAME_MAXLEN) {
+ error = -EINVAL;
goto out;
+ }
if (dlm_no_directory(ls))
flags |= R_CREATE;
- error = 0;
hash = jhash(name, namelen, 0);
bucket = hash & (ls->ls_rsbtbl_size - 1);
- error = search_rsb(ls, name, namelen, bucket, flags, &r);
+ retry:
+ if (flags & R_CREATE) {
+ error = pre_rsb_struct(ls);
+ if (error < 0)
+ goto out;
+ }
+
+ spin_lock(&ls->ls_rsbtbl[bucket].lock);
+
+ error = _search_rsb(ls, name, namelen, bucket, flags, &r);
if (!error)
- goto out;
+ goto out_unlock;
if (error == -EBADR && !(flags & R_CREATE))
- goto out;
+ goto out_unlock;
/* the rsb was found but wasn't a master copy */
if (error == -ENOTBLK)
- goto out;
+ goto out_unlock;
- error = -ENOMEM;
- r = create_rsb(ls, name, namelen);
- if (!r)
- goto out;
+ error = get_rsb_struct(ls, name, namelen, &r);
+ if (error == -EAGAIN) {
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
+ goto retry;
+ }
+ if (error)
+ goto out_unlock;
r->res_hash = hash;
r->res_bucket = bucket;
@@ -474,18 +527,10 @@ static int find_rsb(struct dlm_ls *ls, char *name, int namelen,
nodeid = 0;
r->res_nodeid = nodeid;
}
-
- spin_lock(&ls->ls_rsbtbl[bucket].lock);
- error = _search_rsb(ls, name, namelen, bucket, 0, &tmp);
- if (!error) {
- spin_unlock(&ls->ls_rsbtbl[bucket].lock);
- dlm_free_rsb(r);
- r = tmp;
- goto out;
- }
list_add(&r->res_hashchain, &ls->ls_rsbtbl[bucket].list);
- spin_unlock(&ls->ls_rsbtbl[bucket].lock);
error = 0;
+ out_unlock:
+ spin_unlock(&ls->ls_rsbtbl[bucket].lock);
out:
*r_ret = r;
return error;
@@ -580,9 +625,8 @@ static void detach_lkb(struct dlm_lkb *lkb)
static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
{
- struct dlm_lkb *lkb, *tmp;
- uint32_t lkid = 0;
- uint16_t bucket;
+ struct dlm_lkb *lkb;
+ int rv, id;
lkb = dlm_allocate_lkb(ls);
if (!lkb)
@@ -594,60 +638,42 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
INIT_LIST_HEAD(&lkb->lkb_ownqueue);
INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
INIT_LIST_HEAD(&lkb->lkb_time_list);
- INIT_LIST_HEAD(&lkb->lkb_astqueue);
+ INIT_LIST_HEAD(&lkb->lkb_cb_list);
+ mutex_init(&lkb->lkb_cb_mutex);
+ INIT_WORK(&lkb->lkb_cb_work, dlm_callback_work);
- get_random_bytes(&bucket, sizeof(bucket));
- bucket &= (ls->ls_lkbtbl_size - 1);
-
- write_lock(&ls->ls_lkbtbl[bucket].lock);
+ retry:
+ rv = idr_pre_get(&ls->ls_lkbidr, GFP_NOFS);
+ if (!rv)
+ return -ENOMEM;
- /* counter can roll over so we must verify lkid is not in use */
+ spin_lock(&ls->ls_lkbidr_spin);
+ rv = idr_get_new_above(&ls->ls_lkbidr, lkb, 1, &id);
+ if (!rv)
+ lkb->lkb_id = id;
+ spin_unlock(&ls->ls_lkbidr_spin);
- while (lkid == 0) {
- lkid = (bucket << 16) | ls->ls_lkbtbl[bucket].counter++;
+ if (rv == -EAGAIN)
+ goto retry;
- list_for_each_entry(tmp, &ls->ls_lkbtbl[bucket].list,
- lkb_idtbl_list) {
- if (tmp->lkb_id != lkid)
- continue;
- lkid = 0;
- break;
- }
+ if (rv < 0) {
+ log_error(ls, "create_lkb idr error %d", rv);
+ return rv;
}
- lkb->lkb_id = lkid;
- list_add(&lkb->lkb_idtbl_list, &ls->ls_lkbtbl[bucket].list);
- write_unlock(&ls->ls_lkbtbl[bucket].lock);
-
*lkb_ret = lkb;
return 0;
}
-static struct dlm_lkb *__find_lkb(struct dlm_ls *ls, uint32_t lkid)
-{
- struct dlm_lkb *lkb;
- uint16_t bucket = (lkid >> 16);
-
- list_for_each_entry(lkb, &ls->ls_lkbtbl[bucket].list, lkb_idtbl_list) {
- if (lkb->lkb_id == lkid)
- return lkb;
- }
- return NULL;
-}
-
static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret)
{
struct dlm_lkb *lkb;
- uint16_t bucket = (lkid >> 16);
-
- if (bucket >= ls->ls_lkbtbl_size)
- return -EBADSLT;
- read_lock(&ls->ls_lkbtbl[bucket].lock);
- lkb = __find_lkb(ls, lkid);
+ spin_lock(&ls->ls_lkbidr_spin);
+ lkb = idr_find(&ls->ls_lkbidr, lkid);
if (lkb)
kref_get(&lkb->lkb_ref);
- read_unlock(&ls->ls_lkbtbl[bucket].lock);
+ spin_unlock(&ls->ls_lkbidr_spin);
*lkb_ret = lkb;
return lkb ? 0 : -ENOENT;
@@ -668,12 +694,12 @@ static void kill_lkb(struct kref *kref)
static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
{
- uint16_t bucket = (lkb->lkb_id >> 16);
+ uint32_t lkid = lkb->lkb_id;
- write_lock(&ls->ls_lkbtbl[bucket].lock);
+ spin_lock(&ls->ls_lkbidr_spin);
if (kref_put(&lkb->lkb_ref, kill_lkb)) {
- list_del(&lkb->lkb_idtbl_list);
- write_unlock(&ls->ls_lkbtbl[bucket].lock);
+ idr_remove(&ls->ls_lkbidr, lkid);
+ spin_unlock(&ls->ls_lkbidr_spin);
detach_lkb(lkb);
@@ -683,7 +709,7 @@ static int __put_lkb(struct dlm_ls *ls, struct dlm_lkb *lkb)
dlm_free_lkb(lkb);
return 1;
} else {
- write_unlock(&ls->ls_lkbtbl[bucket].lock);
+ spin_unlock(&ls->ls_lkbidr_spin);
return 0;
}
}
@@ -849,9 +875,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
if (!num_nodes) {
num_nodes = ls->ls_num_nodes;
- warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int));
- if (warned)
- memset(warned, 0, num_nodes * sizeof(int));
+ warned = kzalloc(num_nodes * sizeof(int), GFP_KERNEL);
}
if (!warned)
continue;
@@ -863,9 +887,7 @@ void dlm_scan_waiters(struct dlm_ls *ls)
dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid);
}
mutex_unlock(&ls->ls_waiters_mutex);
-
- if (warned)
- kfree(warned);
+ kfree(warned);
if (debug_expired)
log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us",
@@ -2401,9 +2423,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
if (deadlk) {
/* it's left on the granted queue */
- log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
- lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
- lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
revert_lock(r, lkb);
queue_cast(r, lkb, -EDEADLK);
error = -EDEADLK;
@@ -3993,8 +4012,6 @@ static void _receive_message(struct dlm_ls *ls, struct dlm_message *ms)
default:
log_error(ls, "unknown message type %d", ms->m_type);
}
-
- dlm_astd_wake();
}
/* If the lockspace is in recovery mode (locking stopped), then normal
@@ -4133,7 +4150,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls)
struct dlm_message *ms_stub;
int wait_type, stub_unlock_result, stub_cancel_result;
- ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message));
+ ms_stub = kmalloc(sizeof(struct dlm_message), GFP_KERNEL);
if (!ms_stub) {
log_error(ls, "dlm_recover_waiters_pre no mem");
return;
@@ -4809,7 +4826,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
goto out_put;
spin_lock(&ua->proc->locks_spin);
- /* dlm_user_add_ast() may have already taken lkb off the proc list */
+ /* dlm_user_add_cb() may have already taken lkb off the proc list */
if (!list_empty(&lkb->lkb_ownqueue))
list_move(&lkb->lkb_ownqueue, &ua->proc->unlocking);
spin_unlock(&ua->proc->locks_spin);
@@ -4946,7 +4963,7 @@ static int unlock_proc_lock(struct dlm_ls *ls, struct dlm_lkb *lkb)
/* We have to release clear_proc_locks mutex before calling unlock_proc_lock()
(which does lock_rsb) due to deadlock with receiving a message that does
- lock_rsb followed by dlm_user_add_ast() */
+ lock_rsb followed by dlm_user_add_cb() */
static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
struct dlm_user_proc *proc)
@@ -4969,7 +4986,7 @@ static struct dlm_lkb *del_proc_lock(struct dlm_ls *ls,
return lkb;
}
-/* The ls_clear_proc_locks mutex protects against dlm_user_add_asts() which
+/* The ls_clear_proc_locks mutex protects against dlm_user_add_cb() which
1) references lkb->ua which we free here and 2) adds lkbs to proc->asts,
which we clear here. */
@@ -5011,10 +5028,10 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
dlm_put_lkb(lkb);
}
- list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
memset(&lkb->lkb_callbacks, 0,
sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
dlm_put_lkb(lkb);
}
@@ -5053,10 +5070,10 @@ static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
spin_unlock(&proc->locks_spin);
spin_lock(&proc->asts_spin);
- list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_astqueue) {
+ list_for_each_entry_safe(lkb, safe, &proc->asts, lkb_cb_list) {
memset(&lkb->lkb_callbacks, 0,
sizeof(struct dlm_callback) * DLM_CALLBACKS_SIZE);
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
dlm_put_lkb(lkb);
}
spin_unlock(&proc->asts_spin);
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index 14cbf40..a1d8f1a 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -15,7 +15,6 @@
#include "lockspace.h"
#include "member.h"
#include "recoverd.h"
-#include "ast.h"
#include "dir.h"
#include "lowcomms.h"
#include "config.h"
@@ -24,6 +23,7 @@
#include "recover.h"
#include "requestqueue.h"
#include "user.h"
+#include "ast.h"
static int ls_count;
static struct mutex ls_lock;
@@ -359,17 +359,10 @@ static int threads_start(void)
{
int error;
- /* Thread which process lock requests for all lockspace's */
- error = dlm_astd_start();
- if (error) {
- log_print("cannot start dlm_astd thread %d", error);
- goto fail;
- }
-
error = dlm_scand_start();
if (error) {
log_print("cannot start dlm_scand thread %d", error);
- goto astd_fail;
+ goto fail;
}
/* Thread for sending/receiving messages for all lockspace's */
@@ -383,8 +376,6 @@ static int threads_start(void)
scand_fail:
dlm_scand_stop();
- astd_fail:
- dlm_astd_stop();
fail:
return error;
}
@@ -393,7 +384,6 @@ static void threads_stop(void)
{
dlm_scand_stop();
dlm_lowcomms_stop();
- dlm_astd_stop();
}
static int new_lockspace(const char *name, int namelen, void **lockspace,
@@ -463,7 +453,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
size = dlm_config.ci_rsbtbl_size;
ls->ls_rsbtbl_size = size;
- ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
+ ls->ls_rsbtbl = vmalloc(sizeof(struct dlm_rsbtable) * size);
if (!ls->ls_rsbtbl)
goto out_lsfree;
for (i = 0; i < size; i++) {
@@ -472,22 +462,13 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
spin_lock_init(&ls->ls_rsbtbl[i].lock);
}
- size = dlm_config.ci_lkbtbl_size;
- ls->ls_lkbtbl_size = size;
-
- ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
- if (!ls->ls_lkbtbl)
- goto out_rsbfree;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&ls->ls_lkbtbl[i].list);
- rwlock_init(&ls->ls_lkbtbl[i].lock);
- ls->ls_lkbtbl[i].counter = 1;
- }
+ idr_init(&ls->ls_lkbidr);
+ spin_lock_init(&ls->ls_lkbidr_spin);
size = dlm_config.ci_dirtbl_size;
ls->ls_dirtbl_size = size;
- ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
+ ls->ls_dirtbl = vmalloc(sizeof(struct dlm_dirtable) * size);
if (!ls->ls_dirtbl)
goto out_lkbfree;
for (i = 0; i < size; i++) {
@@ -502,6 +483,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
INIT_LIST_HEAD(&ls->ls_timeout);
mutex_init(&ls->ls_timeout_mutex);
+ INIT_LIST_HEAD(&ls->ls_new_rsb);
+ spin_lock_init(&ls->ls_new_rsb_spin);
+
INIT_LIST_HEAD(&ls->ls_nodes);
INIT_LIST_HEAD(&ls->ls_nodes_gone);
ls->ls_num_nodes = 0;
@@ -520,6 +504,9 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
init_completion(&ls->ls_members_done);
ls->ls_members_result = -1;
+ mutex_init(&ls->ls_cb_mutex);
+ INIT_LIST_HEAD(&ls->ls_cb_delay);
+
ls->ls_recoverd_task = NULL;
mutex_init(&ls->ls_recoverd_active);
spin_lock_init(&ls->ls_recover_lock);
@@ -553,18 +540,26 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
list_add(&ls->ls_list, &lslist);
spin_unlock(&lslist_lock);
+ if (flags & DLM_LSFL_FS) {
+ error = dlm_callback_start(ls);
+ if (error) {
+ log_error(ls, "can't start dlm_callback %d", error);
+ goto out_delist;
+ }
+ }
+
/* needs to find ls in lslist */
error = dlm_recoverd_start(ls);
if (error) {
log_error(ls, "can't start dlm_recoverd %d", error);
- goto out_delist;
+ goto out_callback;
}
ls->ls_kobj.kset = dlm_kset;
error = kobject_init_and_add(&ls->ls_kobj, &dlm_ktype, NULL,
"%s", ls->ls_name);
if (error)
- goto out_stop;
+ goto out_recoverd;
kobject_uevent(&ls->ls_kobj, KOBJ_ADD);
/* let kobject handle freeing of ls if there's an error */
@@ -578,7 +573,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
error = do_uevent(ls, 1);
if (error)
- goto out_stop;
+ goto out_recoverd;
wait_for_completion(&ls->ls_members_done);
error = ls->ls_members_result;
@@ -595,19 +590,20 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
do_uevent(ls, 0);
dlm_clear_members(ls);
kfree(ls->ls_node_array);
- out_stop:
+ out_recoverd:
dlm_recoverd_stop(ls);
+ out_callback:
+ dlm_callback_stop(ls);
out_delist:
spin_lock(&lslist_lock);
list_del(&ls->ls_list);
spin_unlock(&lslist_lock);
kfree(ls->ls_recover_buf);
out_dirfree:
- kfree(ls->ls_dirtbl);
+ vfree(ls->ls_dirtbl);
out_lkbfree:
- kfree(ls->ls_lkbtbl);
- out_rsbfree:
- kfree(ls->ls_rsbtbl);
+ idr_destroy(&ls->ls_lkbidr);
+ vfree(ls->ls_rsbtbl);
out_lsfree:
if (do_unreg)
kobject_put(&ls->ls_kobj);
@@ -641,50 +637,64 @@ int dlm_new_lockspace(const char *name, int namelen, void **lockspace,
return error;
}
-/* Return 1 if the lockspace still has active remote locks,
- * 2 if the lockspace still has active local locks.
- */
-static int lockspace_busy(struct dlm_ls *ls)
-{
- int i, lkb_found = 0;
- struct dlm_lkb *lkb;
-
- /* NOTE: We check the lockidtbl here rather than the resource table.
- This is because there may be LKBs queued as ASTs that have been
- unlinked from their RSBs and are pending deletion once the AST has
- been delivered */
-
- for (i = 0; i < ls->ls_lkbtbl_size; i++) {
- read_lock(&ls->ls_lkbtbl[i].lock);
- if (!list_empty(&ls->ls_lkbtbl[i].list)) {
- lkb_found = 1;
- list_for_each_entry(lkb, &ls->ls_lkbtbl[i].list,
- lkb_idtbl_list) {
- if (!lkb->lkb_nodeid) {
- read_unlock(&ls->ls_lkbtbl[i].lock);
- return 2;
- }
- }
- }
- read_unlock(&ls->ls_lkbtbl[i].lock);
+static int lkb_idr_is_local(int id, void *p, void *data)
+{
+ struct dlm_lkb *lkb = p;
+
+ if (!lkb->lkb_nodeid)
+ return 1;
+ return 0;
+}
+
+static int lkb_idr_is_any(int id, void *p, void *data)
+{
+ return 1;
+}
+
+static int lkb_idr_free(int id, void *p, void *data)
+{
+ struct dlm_lkb *lkb = p;
+
+ if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
+ dlm_free_lvb(lkb->lkb_lvbptr);
+
+ dlm_free_lkb(lkb);
+ return 0;
+}
+
+/* NOTE: We check the lkbidr here rather than the resource table.
+ This is because there may be LKBs queued as ASTs that have been unlinked
+ from their RSBs and are pending deletion once the AST has been delivered */
+
+static int lockspace_busy(struct dlm_ls *ls, int force)
+{
+ int rv;
+
+ spin_lock(&ls->ls_lkbidr_spin);
+ if (force == 0) {
+ rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_any, ls);
+ } else if (force == 1) {
+ rv = idr_for_each(&ls->ls_lkbidr, lkb_idr_is_local, ls);
+ } else {
+ rv = 0;
}
- return lkb_found;
+ spin_unlock(&ls->ls_lkbidr_spin);
+ return rv;
}
static int release_lockspace(struct dlm_ls *ls, int force)
{
- struct dlm_lkb *lkb;
struct dlm_rsb *rsb;
struct list_head *head;
int i, busy, rv;
- busy = lockspace_busy(ls);
+ busy = lockspace_busy(ls, force);
spin_lock(&lslist_lock);
if (ls->ls_create_count == 1) {
- if (busy > force)
+ if (busy) {
rv = -EBUSY;
- else {
+ } else {
/* remove_lockspace takes ls off lslist */
ls->ls_create_count = 0;
rv = 0;
@@ -708,12 +718,12 @@ static int release_lockspace(struct dlm_ls *ls, int force)
dlm_recoverd_stop(ls);
+ dlm_callback_stop(ls);
+
remove_lockspace(ls);
dlm_delete_debug_file(ls);
- dlm_astd_suspend();
-
kfree(ls->ls_recover_buf);
/*
@@ -721,31 +731,15 @@ static int release_lockspace(struct dlm_ls *ls, int force)
*/
dlm_dir_clear(ls);
- kfree(ls->ls_dirtbl);
+ vfree(ls->ls_dirtbl);
/*
- * Free all lkb's on lkbtbl[] lists.
+ * Free all lkb's in idr
*/
- for (i = 0; i < ls->ls_lkbtbl_size; i++) {
- head = &ls->ls_lkbtbl[i].list;
- while (!list_empty(head)) {
- lkb = list_entry(head->next, struct dlm_lkb,
- lkb_idtbl_list);
-
- list_del(&lkb->lkb_idtbl_list);
-
- dlm_del_ast(lkb);
-
- if (lkb->lkb_lvbptr && lkb->lkb_flags & DLM_IFL_MSTCPY)
- dlm_free_lvb(lkb->lkb_lvbptr);
-
- dlm_free_lkb(lkb);
- }
- }
- dlm_astd_resume();
-
- kfree(ls->ls_lkbtbl);
+ idr_for_each(&ls->ls_lkbidr, lkb_idr_free, ls);
+ idr_remove_all(&ls->ls_lkbidr);
+ idr_destroy(&ls->ls_lkbidr);
/*
* Free all rsb's on rsbtbl[] lists
@@ -770,7 +764,14 @@ static int release_lockspace(struct dlm_ls *ls, int force)
}
}
- kfree(ls->ls_rsbtbl);
+ vfree(ls->ls_rsbtbl);
+
+ while (!list_empty(&ls->ls_new_rsb)) {
+ rsb = list_first_entry(&ls->ls_new_rsb, struct dlm_rsb,
+ res_hashchain);
+ list_del(&rsb->res_hashchain);
+ dlm_free_rsb(rsb);
+ }
/*
* Free structures on any other lists
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 5e2c71f..990626e 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -512,12 +512,10 @@ static void process_sctp_notification(struct connection *con,
}
make_sockaddr(&prim.ssp_addr, 0, &addr_len);
if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) {
- int i;
unsigned char *b=(unsigned char *)&prim.ssp_addr;
log_print("reject connect from unknown addr");
- for (i=0; i<sizeof(struct sockaddr_storage);i++)
- printk("%02x ", b[i]);
- printk("\n");
+ print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
+ b, sizeof(struct sockaddr_storage));
sctp_send_shutdown(prim.ssp_assoc_id);
return;
}
@@ -748,7 +746,10 @@ static int tcp_accept_from_sock(struct connection *con)
/* Get the new node's NODEID */
make_sockaddr(&peeraddr, 0, &len);
if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) {
+ unsigned char *b=(unsigned char *)&peeraddr;
log_print("connect from non cluster node");
+ print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
+ b, sizeof(struct sockaddr_storage));
sock_release(newsock);
mutex_unlock(&con->sock_mutex);
return -1;
diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
index 8e0d00d..da64df7 100644
--- a/fs/dlm/memory.c
+++ b/fs/dlm/memory.c
@@ -16,6 +16,7 @@
#include "memory.h"
static struct kmem_cache *lkb_cache;
+static struct kmem_cache *rsb_cache;
int __init dlm_memory_init(void)
@@ -26,6 +27,14 @@ int __init dlm_memory_init(void)
__alignof__(struct dlm_lkb), 0, NULL);
if (!lkb_cache)
ret = -ENOMEM;
+
+ rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb),
+ __alignof__(struct dlm_rsb), 0, NULL);
+ if (!rsb_cache) {
+ kmem_cache_destroy(lkb_cache);
+ ret = -ENOMEM;
+ }
+
return ret;
}
@@ -33,6 +42,8 @@ void dlm_memory_exit(void)
{
if (lkb_cache)
kmem_cache_destroy(lkb_cache);
+ if (rsb_cache)
+ kmem_cache_destroy(rsb_cache);
}
char *dlm_allocate_lvb(struct dlm_ls *ls)
@@ -48,16 +59,11 @@ void dlm_free_lvb(char *p)
kfree(p);
}
-/* FIXME: have some minimal space built-in to rsb for the name and
- kmalloc a separate name if needed, like dentries are done */
-
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls)
{
struct dlm_rsb *r;
- DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
-
- r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
+ r = kmem_cache_zalloc(rsb_cache, GFP_NOFS);
return r;
}
@@ -65,7 +71,7 @@ void dlm_free_rsb(struct dlm_rsb *r)
{
if (r->res_lvbptr)
dlm_free_lvb(r->res_lvbptr);
- kfree(r);
+ kmem_cache_free(rsb_cache, r);
}
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
diff --git a/fs/dlm/memory.h b/fs/dlm/memory.h
index 485fb29..177c11c 100644
--- a/fs/dlm/memory.h
+++ b/fs/dlm/memory.h
@@ -16,7 +16,7 @@
int dlm_memory_init(void);
void dlm_memory_exit(void);
-struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen);
+struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls);
void dlm_free_rsb(struct dlm_rsb *r);
struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls);
void dlm_free_lkb(struct dlm_lkb *l);
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index e2b8780..01fd5c1 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -92,7 +92,7 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number,
op->info.number = number;
op->info.start = 0;
op->info.end = OFFSET_MAX;
- if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
@@ -128,11 +128,11 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- if (fl->fl_lmops && fl->fl_lmops->fl_grant) {
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
/* fl_owner is lockd which doesn't distinguish
processes on the nfs client */
op->info.owner = (__u64) fl->fl_pid;
- xop->callback = fl->fl_lmops->fl_grant;
+ xop->callback = fl->fl_lmops->lm_grant;
locks_init_lock(&xop->flc);
locks_copy_lock(&xop->flc, fl);
xop->fl = fl;
@@ -268,7 +268,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
@@ -327,7 +327,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
op->info.number = number;
op->info.start = fl->fl_start;
op->info.end = fl->fl_end;
- if (fl->fl_lmops && fl->fl_lmops->fl_grant)
+ if (fl->fl_lmops && fl->fl_lmops->lm_grant)
op->info.owner = (__u64) fl->fl_pid;
else
op->info.owner = (__u64)(long) fl->fl_owner;
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index fd677c8..774da3c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -58,13 +58,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
mutex_lock(&ls->ls_recoverd_active);
- /*
- * Suspending and resuming dlm_astd ensures that no lkb's from this ls
- * will be processed by dlm_astd during recovery.
- */
-
- dlm_astd_suspend();
- dlm_astd_resume();
+ dlm_callback_suspend(ls);
/*
* Free non-master tossed rsb's. Master rsb's are kept on toss
@@ -202,6 +196,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_adjust_timeouts(ls);
+ dlm_callback_resume(ls);
+
error = enable_locking(ls, rv->seq);
if (error) {
log_debug(ls, "enable_locking failed %d", error);
@@ -222,8 +218,6 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
dlm_grant_after_purge(ls);
- dlm_astd_wake();
-
log_debug(ls, "recover %llx done: %u ms",
(unsigned long long)rv->seq,
jiffies_to_msecs(jiffies - start));
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index e96bf3e..d8ea607 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -213,9 +213,9 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, uint32_t flags, int mode,
goto out;
}
- if (list_empty(&lkb->lkb_astqueue)) {
+ if (list_empty(&lkb->lkb_cb_list)) {
kref_get(&lkb->lkb_ref);
- list_add_tail(&lkb->lkb_astqueue, &proc->asts);
+ list_add_tail(&lkb->lkb_cb_list, &proc->asts);
wake_up_interruptible(&proc->wait);
}
spin_unlock(&proc->asts_spin);
@@ -832,24 +832,24 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
}
/* if we empty lkb_callbacks, we don't want to unlock the spinlock
- without removing lkb_astqueue; so empty lkb_astqueue is always
+ without removing lkb_cb_list; so empty lkb_cb_list is always
consistent with empty lkb_callbacks */
- lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_astqueue);
+ lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list);
rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid);
if (rv < 0) {
/* this shouldn't happen; lkb should have been removed from
list when resid was zero */
log_print("dlm_rem_lkb_callback empty %x", lkb->lkb_id);
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
spin_unlock(&proc->asts_spin);
/* removes ref for proc->asts, may cause lkb to be freed */
dlm_put_lkb(lkb);
goto try_another;
}
if (!resid)
- list_del_init(&lkb->lkb_astqueue);
+ list_del_init(&lkb->lkb_cb_list);
spin_unlock(&proc->asts_spin);
if (cb.flags & DLM_CB_SKIP) {
diff --git a/fs/efs/inode.c b/fs/efs/inode.c
index 9c13412..bc84f36 100644
--- a/fs/efs/inode.c
+++ b/fs/efs/inode.c
@@ -96,7 +96,7 @@ struct inode *efs_iget(struct super_block *super, unsigned long ino)
efs_inode = (struct efs_dinode *) (bh->b_data + offset);
inode->i_mode = be16_to_cpu(efs_inode->di_mode);
- inode->i_nlink = be16_to_cpu(efs_inode->di_nlink);
+ set_nlink(inode, be16_to_cpu(efs_inode->di_nlink));
inode->i_uid = (uid_t)be16_to_cpu(efs_inode->di_uid);
inode->i_gid = (gid_t)be16_to_cpu(efs_inode->di_gid);
inode->i_size = be32_to_cpu(efs_inode->di_size);
diff --git a/fs/efs/namei.c b/fs/efs/namei.c
index 1511bf9..832b10d 100644
--- a/fs/efs/namei.c
+++ b/fs/efs/namei.c
@@ -60,14 +60,11 @@ static efs_ino_t efs_find_entry(struct inode *inode, const char *name, int len)
struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) {
efs_ino_t inodenum;
- struct inode * inode = NULL;
+ struct inode *inode = NULL;
inodenum = efs_find_entry(dir, dentry->d_name.name, dentry->d_name.len);
- if (inodenum) {
+ if (inodenum)
inode = efs_iget(dir->i_sb, inodenum);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
return d_splice_alias(inode, dentry);
}
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 2d0f757..352ba14 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -12,5 +12,9 @@
# Kbuild - Gets included from the Kernels Makefile and build system
#
-exofs-y := ios.o inode.o file.o symlink.o namei.o dir.o super.o
+# ore module library
+libore-y := ore.o ore_raid.o
+obj-$(CONFIG_ORE) += libore.o
+
+exofs-y := inode.o file.o symlink.o namei.o dir.o super.o
obj-$(CONFIG_EXOFS_FS) += exofs.o
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index c965806..51f4b4c 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -36,12 +36,9 @@
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/backing-dev.h>
-#include "common.h"
+#include <scsi/osd_ore.h>
-/* FIXME: Remove once pnfs hits mainline
- * #include <linux/exportfs/pnfs_osd_xdr.h>
- */
-#include "pnfs.h"
+#include "common.h"
#define EXOFS_ERR(fmt, a...) printk(KERN_ERR "exofs: " fmt, ##a)
@@ -56,27 +53,15 @@
/* u64 has problems with printk this will cast it to unsigned long long */
#define _LLU(x) (unsigned long long)(x)
-struct exofs_layout {
- osd_id s_pid; /* partition ID of file system*/
-
- /* Our way of looking at the data_map */
- unsigned stripe_unit;
- unsigned mirrors_p1;
-
- unsigned group_width;
- u64 group_depth;
- unsigned group_count;
-
- enum exofs_inode_layout_gen_functions lay_func;
-
- unsigned s_numdevs; /* Num of devices in array */
- struct osd_dev *s_ods[0]; /* Variable length */
+struct exofs_dev {
+ struct ore_dev ored;
+ unsigned did;
};
-
/*
* our extension to the in-memory superblock
*/
struct exofs_sb_info {
+ struct backing_dev_info bdi; /* register our bdi with VFS */
struct exofs_sb_stats s_ess; /* Written often, pre-allocate*/
int s_timeout; /* timeout for OSD operations */
uint64_t s_nextid; /* highest object ID used */
@@ -84,17 +69,10 @@ struct exofs_sb_info {
spinlock_t s_next_gen_lock; /* spinlock for gen # update */
u32 s_next_generation; /* next gen # to use */
atomic_t s_curr_pending; /* number of pending commands */
- uint8_t s_cred[OSD_CAP_LEN]; /* credential for the fscb */
- struct backing_dev_info bdi; /* register our bdi with VFS */
-
- struct pnfs_osd_data_map data_map; /* Default raid to use
- * FIXME: Needed ?
- */
-/* struct exofs_layout dir_layout;*/ /* Default dir layout */
- struct exofs_layout layout; /* Default files layout,
- * contains the variable osd_dev
- * array. Keep last */
- struct osd_dev *_min_one_dev[1]; /* Place holder for one dev */
+
+ struct ore_layout layout; /* Default files layout */
+ struct ore_comp one_comp; /* id & cred of partition id=0*/
+ struct ore_components oc; /* comps for the partition */
};
/*
@@ -107,7 +85,8 @@ struct exofs_i_info {
uint32_t i_data[EXOFS_IDATA];/*short symlink names and device #s*/
uint32_t i_dir_start_lookup; /* which page to start lookup */
uint64_t i_commit_size; /* the object's written length */
- uint8_t i_cred[OSD_CAP_LEN];/* all-powerful credential */
+ struct ore_comp one_comp; /* same component for all devices */
+ struct ore_components oc; /* inode view of the device table */
};
static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
@@ -115,52 +94,6 @@ static inline osd_id exofs_oi_objno(struct exofs_i_info *oi)
return oi->vfs_inode.i_ino + EXOFS_OBJ_OFF;
}
-struct exofs_io_state;
-typedef void (*exofs_io_done_fn)(struct exofs_io_state *or, void *private);
-
-struct exofs_io_state {
- struct kref kref;
-
- void *private;
- exofs_io_done_fn done;
-
- struct exofs_layout *layout;
- struct osd_obj_id obj;
- u8 *cred;
-
- /* Global read/write IO*/
- loff_t offset;
- unsigned long length;
- void *kern_buff;
-
- struct page **pages;
- unsigned nr_pages;
- unsigned pgbase;
- unsigned pages_consumed;
-
- /* Attributes */
- unsigned in_attr_len;
- struct osd_attr *in_attr;
- unsigned out_attr_len;
- struct osd_attr *out_attr;
-
- /* Variable array of size numdevs */
- unsigned numdevs;
- struct exofs_per_dev_state {
- struct osd_request *or;
- struct bio *bio;
- loff_t offset;
- unsigned length;
- unsigned dev;
- } per_dev[];
-};
-
-static inline unsigned exofs_io_state_size(unsigned numdevs)
-{
- return sizeof(struct exofs_io_state) +
- sizeof(struct exofs_per_dev_state) * numdevs;
-}
-
/*
* our inode flags
*/
@@ -205,12 +138,6 @@ static inline struct exofs_i_info *exofs_i(struct inode *inode)
}
/*
- * Given a layout, object_number and stripe_index return the associated global
- * dev_index
- */
-unsigned exofs_layout_od_id(struct exofs_layout *layout,
- osd_id obj_no, unsigned layout_index);
-/*
* Maximum count of links to a file
*/
#define EXOFS_LINK_MAX 32000
@@ -219,44 +146,8 @@ unsigned exofs_layout_od_id(struct exofs_layout *layout,
* function declarations *
*************************/
-/* ios.c */
-void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
- const struct osd_obj_id *obj);
-int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
- u64 offset, void *p, unsigned length);
-
-int exofs_get_io_state(struct exofs_layout *layout,
- struct exofs_io_state **ios);
-void exofs_put_io_state(struct exofs_io_state *ios);
-
-int exofs_check_io(struct exofs_io_state *ios, u64 *resid);
-
-int exofs_sbi_create(struct exofs_io_state *ios);
-int exofs_sbi_remove(struct exofs_io_state *ios);
-int exofs_sbi_write(struct exofs_io_state *ios);
-int exofs_sbi_read(struct exofs_io_state *ios);
-
-int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr);
-
-int exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
-static inline int exofs_oi_write(struct exofs_i_info *oi,
- struct exofs_io_state *ios)
-{
- ios->obj.id = exofs_oi_objno(oi);
- ios->cred = oi->i_cred;
- return exofs_sbi_write(ios);
-}
-
-static inline int exofs_oi_read(struct exofs_i_info *oi,
- struct exofs_io_state *ios)
-{
- ios->obj.id = exofs_oi_objno(oi);
- ios->cred = oi->i_cred;
- return exofs_sbi_read(ios);
-}
-
/* inode.c */
-unsigned exofs_max_io_pages(struct exofs_layout *layout,
+unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages);
int exofs_setattr(struct dentry *, struct iattr *);
int exofs_write_begin(struct file *file, struct address_space *mapping,
@@ -281,6 +172,8 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
struct inode *);
/* super.c */
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN],
+ const struct osd_obj_id *obj);
int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
/*********************
@@ -295,7 +188,6 @@ extern const struct file_operations exofs_file_operations;
/* inode.c */
extern const struct address_space_operations exofs_aops;
-extern const struct osd_attr g_attr_logical_length;
/* namei.c */
extern const struct inode_operations exofs_dir_inode_operations;
@@ -305,4 +197,35 @@ extern const struct inode_operations exofs_special_inode_operations;
extern const struct inode_operations exofs_symlink_inode_operations;
extern const struct inode_operations exofs_fast_symlink_inode_operations;
+/* exofs_init_comps will initialize an ore_components device array
+ * pointing to a single ore_comp struct, and a round-robin view
+ * of the device table.
+ * The first device of each inode is the [inode->ino % num_devices]
+ * and the rest of the devices sequentially following where the
+ * first device is after the last device.
+ * It is assumed that the global device array at @sbi is twice
+ * bigger and that the device table repeats twice.
+ * See: exofs_read_lookup_dev_table()
+ */
+static inline void exofs_init_comps(struct ore_components *oc,
+ struct ore_comp *one_comp,
+ struct exofs_sb_info *sbi, osd_id oid)
+{
+ unsigned dev_mod = (unsigned)oid, first_dev;
+
+ one_comp->obj.partition = sbi->one_comp.obj.partition;
+ one_comp->obj.id = oid;
+ exofs_make_credential(one_comp->cred, &one_comp->obj);
+
+ oc->first_dev = 0;
+ oc->numdevs = sbi->layout.group_width * sbi->layout.mirrors_p1 *
+ sbi->layout.group_count;
+ oc->single_comp = EC_SINGLE_COMP;
+ oc->comps = one_comp;
+
+ /* Round robin device view of the table */
+ first_dev = (dev_mod * sbi->layout.mirrors_p1) % sbi->oc.numdevs;
+ oc->ods = &sbi->oc.ods[first_dev];
+}
+
#endif
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 45ca323..491c6c0 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -42,11 +42,19 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
* Note, in exofs all metadata is written as part of inode, regardless.
* The writeout is synchronous
*/
-static int exofs_file_fsync(struct file *filp, int datasync)
+static int exofs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
+ struct inode *inode = filp->f_mapping->host;
int ret;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+
+ mutex_lock(&inode->i_mutex);
ret = sync_inode_metadata(filp->f_mapping->host, 1);
+ mutex_unlock(&inode->i_mutex);
return ret;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 8472c09..f6dbf77 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,20 +37,15 @@
#define EXOFS_DBGMSG2(M...) do {} while (0)
-enum { BIO_MAX_PAGES_KMALLOC =
- (PAGE_SIZE - sizeof(struct bio)) / sizeof(struct bio_vec),
- MAX_PAGES_KMALLOC =
- PAGE_SIZE / sizeof(struct page *),
-};
+enum {MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), };
-unsigned exofs_max_io_pages(struct exofs_layout *layout,
+unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages)
{
unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
/* TODO: easily support bio chaining */
- pages = min_t(unsigned, pages,
- layout->group_width * BIO_MAX_PAGES_KMALLOC);
+ pages = min_t(unsigned, pages, layout->max_io_length / PAGE_SIZE);
return pages;
}
@@ -58,7 +53,7 @@ struct page_collect {
struct exofs_sb_info *sbi;
struct inode *inode;
unsigned expected_pages;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct page **pages;
unsigned alloc_pages;
@@ -68,6 +63,7 @@ struct page_collect {
bool read_4_write; /* This means two things: that the read is sync
* And the pages should not be unlocked.
*/
+ struct page *that_locked_page;
};
static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
@@ -86,6 +82,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages,
pcol->length = 0;
pcol->pg_first = -1;
pcol->read_4_write = false;
+ pcol->that_locked_page = NULL;
}
static void _pcol_reset(struct page_collect *pcol)
@@ -98,6 +95,7 @@ static void _pcol_reset(struct page_collect *pcol)
pcol->length = 0;
pcol->pg_first = -1;
pcol->ios = NULL;
+ pcol->that_locked_page = NULL;
/* this is probably the end of the loop but in writes
* it might not end here. don't be left with nothing
@@ -110,13 +108,6 @@ static int pcol_try_alloc(struct page_collect *pcol)
{
unsigned pages;
- if (!pcol->ios) { /* First time allocate io_state */
- int ret = exofs_get_io_state(&pcol->sbi->layout, &pcol->ios);
-
- if (ret)
- return ret;
- }
-
/* TODO: easily support bio chaining */
pages = exofs_max_io_pages(&pcol->sbi->layout, pcol->expected_pages);
@@ -140,7 +131,7 @@ static void pcol_free(struct page_collect *pcol)
pcol->pages = NULL;
if (pcol->ios) {
- exofs_put_io_state(pcol->ios);
+ ore_put_io_state(pcol->ios);
pcol->ios = NULL;
}
}
@@ -156,14 +147,17 @@ static int pcol_add_page(struct page_collect *pcol, struct page *page,
return 0;
}
+enum {PAGE_WAS_NOT_IN_IO = 17};
static int update_read_page(struct page *page, int ret)
{
- if (ret == 0) {
+ switch (ret) {
+ case 0:
/* Everything is OK */
SetPageUptodate(page);
if (PageError(page))
ClearPageError(page);
- } else if (ret == -EFAULT) {
+ break;
+ case -EFAULT:
/* In this case we were trying to read something that wasn't on
* disk yet - return a page full of zeroes. This should be OK,
* because the object should be empty (if there was a write
@@ -174,16 +168,22 @@ static int update_read_page(struct page *page, int ret)
SetPageUptodate(page);
if (PageError(page))
ClearPageError(page);
- ret = 0; /* recovered error */
EXOFS_DBGMSG("recovered read error\n");
- } else /* Error */
+ /* fall through */
+ case PAGE_WAS_NOT_IN_IO:
+ ret = 0; /* recovered error */
+ break;
+ default:
SetPageError(page);
-
+ }
return ret;
}
static void update_write_page(struct page *page, int ret)
{
+ if (unlikely(ret == PAGE_WAS_NOT_IN_IO))
+ return; /* don't pass start don't collect $200 */
+
if (ret) {
mapping_set_error(page->mapping, ret);
SetPageError(page);
@@ -197,15 +197,16 @@ static void update_write_page(struct page *page, int ret)
static int __readpages_done(struct page_collect *pcol)
{
int i;
- u64 resid;
u64 good_bytes;
u64 length = 0;
- int ret = exofs_check_io(pcol->ios, &resid);
+ int ret = ore_check_io(pcol->ios, NULL);
- if (likely(!ret))
+ if (likely(!ret)) {
good_bytes = pcol->length;
- else
- good_bytes = pcol->length - resid;
+ ret = PAGE_WAS_NOT_IN_IO;
+ } else {
+ good_bytes = 0;
+ }
EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
" length=0x%lx nr_pages=%u\n",
@@ -241,7 +242,7 @@ static int __readpages_done(struct page_collect *pcol)
}
/* callback of async reads */
-static void readpages_done(struct exofs_io_state *ios, void *p)
+static void readpages_done(struct ore_io_state *ios, void *p)
{
struct page_collect *pcol = p;
@@ -266,23 +267,70 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw)
}
}
+static int _maybe_not_all_in_one_io(struct ore_io_state *ios,
+ struct page_collect *pcol_src, struct page_collect *pcol)
+{
+ /* length was wrong or offset was not page aligned */
+ BUG_ON(pcol_src->nr_pages < ios->nr_pages);
+
+ if (pcol_src->nr_pages > ios->nr_pages) {
+ struct page **src_page;
+ unsigned pages_less = pcol_src->nr_pages - ios->nr_pages;
+ unsigned long len_less = pcol_src->length - ios->length;
+ unsigned i;
+ int ret;
+
+ /* This IO was trimmed */
+ pcol_src->nr_pages = ios->nr_pages;
+ pcol_src->length = ios->length;
+
+ /* Left over pages are passed to the next io */
+ pcol->expected_pages += pages_less;
+ pcol->nr_pages = pages_less;
+ pcol->length = len_less;
+ src_page = pcol_src->pages + pcol_src->nr_pages;
+ pcol->pg_first = (*src_page)->index;
+
+ ret = pcol_try_alloc(pcol);
+ if (unlikely(ret))
+ return ret;
+
+ for (i = 0; i < pages_less; ++i)
+ pcol->pages[i] = *src_page++;
+
+ EXOFS_DBGMSG("Length was adjusted nr_pages=0x%x "
+ "pages_less=0x%x expected_pages=0x%x "
+ "next_offset=0x%llx next_len=0x%lx\n",
+ pcol_src->nr_pages, pages_less, pcol->expected_pages,
+ pcol->pg_first * PAGE_SIZE, pcol->length);
+ }
+ return 0;
+}
+
static int read_exec(struct page_collect *pcol)
{
struct exofs_i_info *oi = exofs_i(pcol->inode);
- struct exofs_io_state *ios = pcol->ios;
+ struct ore_io_state *ios;
struct page_collect *pcol_copy = NULL;
int ret;
if (!pcol->pages)
return 0;
+ if (!pcol->ios) {
+ int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true,
+ pcol->pg_first << PAGE_CACHE_SHIFT,
+ pcol->length, &pcol->ios);
+
+ if (ret)
+ return ret;
+ }
+
+ ios = pcol->ios;
ios->pages = pcol->pages;
- ios->nr_pages = pcol->nr_pages;
- ios->length = pcol->length;
- ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT;
if (pcol->read_4_write) {
- exofs_oi_read(oi, pcol->ios);
+ ore_read(pcol->ios);
return __readpages_done(pcol);
}
@@ -295,17 +343,23 @@ static int read_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
ios->done = readpages_done;
ios->private = pcol_copy;
- ret = exofs_oi_read(oi, ios);
+
+ /* pages ownership was passed to pcol_copy */
+ _pcol_reset(pcol);
+
+ ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol);
if (unlikely(ret))
goto err;
- atomic_inc(&pcol->sbi->s_curr_pending);
+ EXOFS_DBGMSG2("read_exec(0x%lx) offset=0x%llx length=0x%llx\n",
+ pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length));
- EXOFS_DBGMSG2("read_exec obj=0x%llx start=0x%llx length=0x%lx\n",
- ios->obj.id, _LLU(ios->offset), pcol->length);
+ ret = ore_read(ios);
+ if (unlikely(ret))
+ goto err;
+
+ atomic_inc(&pcol->sbi->s_curr_pending);
- /* pages ownership was passed to pcol_copy */
- _pcol_reset(pcol);
return 0;
err:
@@ -340,6 +394,8 @@ static int readpage_strip(void *data, struct page *page)
EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
page->index);
+ pcol->that_locked_page = page;
+
if (page->index < end_index)
len = PAGE_CACHE_SIZE;
else if (page->index == end_index)
@@ -428,6 +484,10 @@ static int exofs_readpages(struct file *file, struct address_space *mapping,
return ret;
}
+ ret = read_exec(&pcol);
+ if (unlikely(ret))
+ return ret;
+
return read_exec(&pcol);
}
@@ -457,21 +517,22 @@ static int exofs_readpage(struct file *file, struct page *page)
}
/* Callback for osd_write. All writes are asynchronous */
-static void writepages_done(struct exofs_io_state *ios, void *p)
+static void writepages_done(struct ore_io_state *ios, void *p)
{
struct page_collect *pcol = p;
int i;
- u64 resid;
u64 good_bytes;
u64 length = 0;
- int ret = exofs_check_io(ios, &resid);
+ int ret = ore_check_io(ios, NULL);
atomic_dec(&pcol->sbi->s_curr_pending);
- if (likely(!ret))
+ if (likely(!ret)) {
good_bytes = pcol->length;
- else
- good_bytes = pcol->length - resid;
+ ret = PAGE_WAS_NOT_IN_IO;
+ } else {
+ good_bytes = 0;
+ }
EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
" length=0x%lx nr_pages=%u\n",
@@ -504,16 +565,73 @@ static void writepages_done(struct exofs_io_state *ios, void *p)
EXOFS_DBGMSG2("writepages_done END\n");
}
+static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
+{
+ struct page_collect *pcol = priv;
+ pgoff_t index = offset / PAGE_SIZE;
+
+ if (!pcol->that_locked_page ||
+ (pcol->that_locked_page->index != index)) {
+ struct page *page = find_get_page(pcol->inode->i_mapping, index);
+
+ if (!page) {
+ page = find_or_create_page(pcol->inode->i_mapping,
+ index, GFP_NOFS);
+ if (unlikely(!page)) {
+ EXOFS_DBGMSG("grab_cache_page Failed "
+ "index=0x%llx\n", _LLU(index));
+ return NULL;
+ }
+ unlock_page(page);
+ }
+ if (PageDirty(page) || PageWriteback(page))
+ *uptodate = true;
+ else
+ *uptodate = PageUptodate(page);
+ EXOFS_DBGMSG("index=0x%lx uptodate=%d\n", index, *uptodate);
+ return page;
+ } else {
+ EXOFS_DBGMSG("YES that_locked_page index=0x%lx\n",
+ pcol->that_locked_page->index);
+ *uptodate = true;
+ return pcol->that_locked_page;
+ }
+}
+
+static void __r4w_put_page(void *priv, struct page *page)
+{
+ struct page_collect *pcol = priv;
+
+ if (pcol->that_locked_page != page) {
+ EXOFS_DBGMSG("index=0x%lx\n", page->index);
+ page_cache_release(page);
+ return;
+ }
+ EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index);
+}
+
+static const struct _ore_r4w_op _r4w_op = {
+ .get_page = &__r4w_get_page,
+ .put_page = &__r4w_put_page,
+};
+
static int write_exec(struct page_collect *pcol)
{
struct exofs_i_info *oi = exofs_i(pcol->inode);
- struct exofs_io_state *ios = pcol->ios;
+ struct ore_io_state *ios;
struct page_collect *pcol_copy = NULL;
int ret;
if (!pcol->pages)
return 0;
+ BUG_ON(pcol->ios);
+ ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false,
+ pcol->pg_first << PAGE_CACHE_SHIFT,
+ pcol->length, &pcol->ios);
+ if (unlikely(ret))
+ goto err;
+
pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL);
if (!pcol_copy) {
EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n");
@@ -523,25 +641,29 @@ static int write_exec(struct page_collect *pcol)
*pcol_copy = *pcol;
+ ios = pcol->ios;
ios->pages = pcol_copy->pages;
- ios->nr_pages = pcol_copy->nr_pages;
- ios->offset = pcol_copy->pg_first << PAGE_CACHE_SHIFT;
- ios->length = pcol_copy->length;
ios->done = writepages_done;
+ ios->r4w = &_r4w_op;
ios->private = pcol_copy;
- ret = exofs_oi_write(oi, ios);
+ /* pages ownership was passed to pcol_copy */
+ _pcol_reset(pcol);
+
+ ret = _maybe_not_all_in_one_io(ios, pcol_copy, pcol);
+ if (unlikely(ret))
+ goto err;
+
+ EXOFS_DBGMSG2("write_exec(0x%lx) offset=0x%llx length=0x%llx\n",
+ pcol->inode->i_ino, _LLU(ios->offset), _LLU(ios->length));
+
+ ret = ore_write(ios);
if (unlikely(ret)) {
- EXOFS_ERR("write_exec: exofs_oi_write() Failed\n");
+ EXOFS_ERR("write_exec: ore_write() Failed\n");
goto err;
}
atomic_inc(&pcol->sbi->s_curr_pending);
- EXOFS_DBGMSG2("write_exec(0x%lx, 0x%llx) start=0x%llx length=0x%lx\n",
- pcol->inode->i_ino, pcol->pg_first, _LLU(ios->offset),
- pcol->length);
- /* pages ownership was passed to pcol_copy */
- _pcol_reset(pcol);
return 0;
err:
@@ -681,14 +803,33 @@ static int exofs_writepages(struct address_space *mapping,
_pcol_init(&pcol, expected_pages, mapping->host);
ret = write_cache_pages(mapping, wbc, writepage_strip, &pcol);
- if (ret) {
+ if (unlikely(ret)) {
EXOFS_ERR("write_cache_pages => %d\n", ret);
return ret;
}
- return write_exec(&pcol);
+ ret = write_exec(&pcol);
+ if (unlikely(ret))
+ return ret;
+
+ if (wbc->sync_mode == WB_SYNC_ALL) {
+ return write_exec(&pcol); /* pump the last reminder */
+ } else if (pcol.nr_pages) {
+ /* not SYNC let the reminder join the next writeout */
+ unsigned i;
+
+ for (i = 0; i < pcol.nr_pages; i++) {
+ struct page *page = pcol.pages[i];
+
+ end_page_writeback(page);
+ set_page_dirty(page);
+ unlock_page(page);
+ }
+ }
+ return 0;
}
+/*
static int exofs_writepage(struct page *page, struct writeback_control *wbc)
{
struct page_collect pcol;
@@ -704,7 +845,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
return write_exec(&pcol);
}
-
+*/
/* i_mutex held using inode->i_size directly */
static void _write_failed(struct inode *inode, loff_t to)
{
@@ -810,7 +951,7 @@ static void exofs_invalidatepage(struct page *page, unsigned long offset)
const struct address_space_operations exofs_aops = {
.readpage = exofs_readpage,
.readpages = exofs_readpages,
- .writepage = exofs_writepage,
+ .writepage = NULL,
.writepages = exofs_writepages,
.write_begin = exofs_write_begin_export,
.write_end = exofs_write_end,
@@ -844,17 +985,15 @@ static inline int exofs_inode_is_fast_symlink(struct inode *inode)
return S_ISLNK(inode->i_mode) && (oi->i_data[0] != 0);
}
-const struct osd_attr g_attr_logical_length = ATTR_DEF(
- OSD_APAGE_OBJECT_INFORMATION, OSD_ATTR_OI_LOGICAL_LENGTH, 8);
-
static int _do_truncate(struct inode *inode, loff_t newsize)
{
struct exofs_i_info *oi = exofs_i(inode);
+ struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret;
inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- ret = exofs_oi_truncate(oi, (u64)newsize);
+ ret = ore_truncate(&sbi->layout, &oi->oc, (u64)newsize);
if (likely(!ret))
truncate_setsize(inode, newsize);
@@ -917,30 +1056,26 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
[1] = g_attr_inode_file_layout,
[2] = g_attr_inode_dir_layout,
};
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct exofs_on_disk_inode_layout *layout;
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret;
}
- ios->obj.id = exofs_oi_objno(oi);
- exofs_make_credential(oi->i_cred, &ios->obj);
- ios->cred = oi->i_cred;
-
- attrs[1].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
- attrs[2].len = exofs_on_disk_inode_layout_size(sbi->layout.s_numdevs);
+ attrs[1].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs);
+ attrs[2].len = exofs_on_disk_inode_layout_size(sbi->oc.numdevs);
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_read(ios);
+ ret = ore_read(ios);
if (unlikely(ret)) {
EXOFS_ERR("object(0x%llx) corrupted, return empty file=>%d\n",
- _LLU(ios->obj.id), ret);
+ _LLU(oi->one_comp.obj.id), ret);
memset(inode, 0, sizeof(*inode));
inode->i_mode = 0040000 | (0777 & ~022);
/* If object is lost on target we might as well enable it's
@@ -990,7 +1125,7 @@ static int exofs_get_inode(struct super_block *sb, struct exofs_i_info *oi,
}
out:
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ret;
}
@@ -1016,6 +1151,8 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
return inode;
oi = exofs_i(inode);
__oi_init(oi);
+ exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info,
+ exofs_oi_objno(oi));
/* read the inode from the osd */
ret = exofs_get_inode(sb, oi, &fcb);
@@ -1028,7 +1165,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(fcb.i_mode);
inode->i_uid = le32_to_cpu(fcb.i_uid);
inode->i_gid = le32_to_cpu(fcb.i_gid);
- inode->i_nlink = le16_to_cpu(fcb.i_links_count);
+ set_nlink(inode, le16_to_cpu(fcb.i_links_count));
inode->i_ctime.tv_sec = (signed)le32_to_cpu(fcb.i_ctime);
inode->i_atime.tv_sec = (signed)le32_to_cpu(fcb.i_atime);
inode->i_mtime.tv_sec = (signed)le32_to_cpu(fcb.i_mtime);
@@ -1107,21 +1244,22 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
* set the obj_created flag so that other methods know that the object exists on
* the OSD.
*/
-static void create_done(struct exofs_io_state *ios, void *p)
+static void create_done(struct ore_io_state *ios, void *p)
{
struct inode *inode = p;
struct exofs_i_info *oi = exofs_i(inode);
struct exofs_sb_info *sbi = inode->i_sb->s_fs_info;
int ret;
- ret = exofs_check_io(ios, NULL);
- exofs_put_io_state(ios);
+ ret = ore_check_io(ios, NULL);
+ ore_put_io_state(ios);
atomic_dec(&sbi->s_curr_pending);
if (unlikely(ret)) {
EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx",
- _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid));
+ _LLU(exofs_oi_objno(oi)),
+ _LLU(oi->one_comp.obj.partition));
/*TODO: When FS is corrupted creation can fail, object already
* exist. Get rid of this asynchronous creation, if exist
* increment the obj counter and try the next object. Until we
@@ -1140,14 +1278,13 @@ static void create_done(struct exofs_io_state *ios, void *p)
*/
struct inode *exofs_new_inode(struct inode *dir, int mode)
{
- struct super_block *sb;
+ struct super_block *sb = dir->i_sb;
+ struct exofs_sb_info *sbi = sb->s_fs_info;
struct inode *inode;
struct exofs_i_info *oi;
- struct exofs_sb_info *sbi;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
- sb = dir->i_sb;
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -1157,8 +1294,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
set_obj_2bcreated(oi);
- sbi = sb->s_fs_info;
-
inode->i_mapping->backing_dev_info = sb->s_bdi;
inode_init_owner(inode, dir, mode);
inode->i_ino = sbi->s_nextid++;
@@ -1170,25 +1305,24 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
spin_unlock(&sbi->s_next_gen_lock);
insert_inode_hash(inode);
+ exofs_init_comps(&oi->oc, &oi->one_comp, sb->s_fs_info,
+ exofs_oi_objno(oi));
exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
mark_inode_dirty(inode);
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("exofs_new_inode: exofs_get_io_state failed\n");
+ EXOFS_ERR("exofs_new_inode: ore_get_io_state failed\n");
return ERR_PTR(ret);
}
- ios->obj.id = exofs_oi_objno(oi);
- exofs_make_credential(oi->i_cred, &ios->obj);
-
ios->done = create_done;
ios->private = inode;
- ios->cred = oi->i_cred;
- ret = exofs_sbi_create(ios);
+
+ ret = ore_create(ios);
if (ret) {
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ERR_PTR(ret);
}
atomic_inc(&sbi->s_curr_pending);
@@ -1207,11 +1341,11 @@ struct updatei_args {
/*
* Callback function from exofs_update_inode().
*/
-static void updatei_done(struct exofs_io_state *ios, void *p)
+static void updatei_done(struct ore_io_state *ios, void *p)
{
struct updatei_args *args = p;
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
atomic_dec(&args->sbi->s_curr_pending);
@@ -1227,7 +1361,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct osd_attr attr;
struct exofs_fcb *fcb;
struct updatei_args *args;
@@ -1266,9 +1400,9 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
} else
memcpy(fcb->i_data, oi->i_data, sizeof(fcb->i_data));
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
goto free_args;
}
@@ -1285,13 +1419,13 @@ static int exofs_update_inode(struct inode *inode, int do_sync)
ios->private = args;
}
- ret = exofs_oi_write(oi, ios);
+ ret = ore_write(ios);
if (!do_sync && !ret) {
atomic_inc(&sbi->s_curr_pending);
goto out; /* deallocation in updatei_done */
}
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
free_args:
kfree(args);
out:
@@ -1310,11 +1444,11 @@ int exofs_write_inode(struct inode *inode, struct writeback_control *wbc)
* Callback function from exofs_delete_inode() - don't have much cleaning up to
* do.
*/
-static void delete_done(struct exofs_io_state *ios, void *p)
+static void delete_done(struct ore_io_state *ios, void *p)
{
struct exofs_sb_info *sbi = p;
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
atomic_dec(&sbi->s_curr_pending);
}
@@ -1329,7 +1463,7 @@ void exofs_evict_inode(struct inode *inode)
struct exofs_i_info *oi = exofs_i(inode);
struct super_block *sb = inode->i_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
truncate_inode_pages(&inode->i_data, 0);
@@ -1349,20 +1483,19 @@ void exofs_evict_inode(struct inode *inode)
/* ignore the error, attempt a remove anyway */
/* Now Remove the OSD objects */
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &oi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed\n", __func__);
return;
}
- ios->obj.id = exofs_oi_objno(oi);
ios->done = delete_done;
ios->private = sbi;
- ios->cred = oi->i_cred;
- ret = exofs_sbi_remove(ios);
+
+ ret = ore_remove(ios);
if (ret) {
- EXOFS_ERR("%s: exofs_sbi_remove failed\n", __func__);
- exofs_put_io_state(ios);
+ EXOFS_ERR("%s: ore_remove failed\n", __func__);
+ ore_put_io_state(ios);
return;
}
atomic_inc(&sbi->s_curr_pending);
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c
deleted file mode 100644
index f74a2ec..0000000
--- a/fs/exofs/ios.c
+++ /dev/null
@@ -1,803 +0,0 @@
-/*
- * Copyright (C) 2005, 2006
- * Avishay Traeger (avishay@gmail.com)
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation. Since it is based on ext2, and the only
- * valid version of GPL for the Linux kernel is version 2, the only valid
- * version of GPL for exofs is version 2.
- *
- * exofs is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with exofs; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/slab.h>
-#include <scsi/scsi_device.h>
-#include <asm/div64.h>
-
-#include "exofs.h"
-
-#define EXOFS_DBGMSG2(M...) do {} while (0)
-/* #define EXOFS_DBGMSG2 EXOFS_DBGMSG */
-
-void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
-{
- osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
-}
-
-int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
- u64 offset, void *p, unsigned length)
-{
- struct osd_request *or = osd_start_request(od, GFP_KERNEL);
-/* struct osd_sense_info osi = {.key = 0};*/
- int ret;
-
- if (unlikely(!or)) {
- EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
- return -ENOMEM;
- }
- ret = osd_req_read_kern(or, obj, offset, p, length);
- if (unlikely(ret)) {
- EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
- goto out;
- }
-
- ret = osd_finalize_request(or, 0, cred, NULL);
- if (unlikely(ret)) {
- EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
- goto out;
- }
-
- ret = osd_execute_request(or);
- if (unlikely(ret))
- EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
- /* osd_req_decode_sense(or, ret); */
-
-out:
- osd_end_request(or);
- return ret;
-}
-
-int exofs_get_io_state(struct exofs_layout *layout,
- struct exofs_io_state **pios)
-{
- struct exofs_io_state *ios;
-
- /*TODO: Maybe use kmem_cach per sbi of size
- * exofs_io_state_size(layout->s_numdevs)
- */
- ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL);
- if (unlikely(!ios)) {
- EXOFS_DBGMSG("Failed kzalloc bytes=%d\n",
- exofs_io_state_size(layout->s_numdevs));
- *pios = NULL;
- return -ENOMEM;
- }
-
- ios->layout = layout;
- ios->obj.partition = layout->s_pid;
- *pios = ios;
- return 0;
-}
-
-void exofs_put_io_state(struct exofs_io_state *ios)
-{
- if (ios) {
- unsigned i;
-
- for (i = 0; i < ios->numdevs; i++) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[i];
-
- if (per_dev->or)
- osd_end_request(per_dev->or);
- if (per_dev->bio)
- bio_put(per_dev->bio);
- }
-
- kfree(ios);
- }
-}
-
-unsigned exofs_layout_od_id(struct exofs_layout *layout,
- osd_id obj_no, unsigned layout_index)
-{
-/* switch (layout->lay_func) {
- case LAYOUT_MOVING_WINDOW:
- {*/
- unsigned dev_mod = obj_no;
-
- return (layout_index + dev_mod * layout->mirrors_p1) %
- layout->s_numdevs;
-/* }
- case LAYOUT_FUNC_IMPLICT:
- return layout->devs[layout_index];
- }*/
-}
-
-static inline struct osd_dev *exofs_ios_od(struct exofs_io_state *ios,
- unsigned layout_index)
-{
- return ios->layout->s_ods[
- exofs_layout_od_id(ios->layout, ios->obj.id, layout_index)];
-}
-
-static void _sync_done(struct exofs_io_state *ios, void *p)
-{
- struct completion *waiting = p;
-
- complete(waiting);
-}
-
-static void _last_io(struct kref *kref)
-{
- struct exofs_io_state *ios = container_of(
- kref, struct exofs_io_state, kref);
-
- ios->done(ios, ios->private);
-}
-
-static void _done_io(struct osd_request *or, void *p)
-{
- struct exofs_io_state *ios = p;
-
- kref_put(&ios->kref, _last_io);
-}
-
-static int exofs_io_execute(struct exofs_io_state *ios)
-{
- DECLARE_COMPLETION_ONSTACK(wait);
- bool sync = (ios->done == NULL);
- int i, ret;
-
- if (sync) {
- ios->done = _sync_done;
- ios->private = &wait;
- }
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_request *or = ios->per_dev[i].or;
- if (unlikely(!or))
- continue;
-
- ret = osd_finalize_request(or, 0, ios->cred, NULL);
- if (unlikely(ret)) {
- EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n",
- ret);
- return ret;
- }
- }
-
- kref_init(&ios->kref);
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_request *or = ios->per_dev[i].or;
- if (unlikely(!or))
- continue;
-
- kref_get(&ios->kref);
- osd_execute_request_async(or, _done_io, ios);
- }
-
- kref_put(&ios->kref, _last_io);
- ret = 0;
-
- if (sync) {
- wait_for_completion(&wait);
- ret = exofs_check_io(ios, NULL);
- }
- return ret;
-}
-
-static void _clear_bio(struct bio *bio)
-{
- struct bio_vec *bv;
- unsigned i;
-
- __bio_for_each_segment(bv, bio, i, 0) {
- unsigned this_count = bv->bv_len;
-
- if (likely(PAGE_SIZE == this_count))
- clear_highpage(bv->bv_page);
- else
- zero_user(bv->bv_page, bv->bv_offset, this_count);
- }
-}
-
-int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
-{
- enum osd_err_priority acumulated_osd_err = 0;
- int acumulated_lin_err = 0;
- int i;
-
- for (i = 0; i < ios->numdevs; i++) {
- struct osd_sense_info osi;
- struct osd_request *or = ios->per_dev[i].or;
- int ret;
-
- if (unlikely(!or))
- continue;
-
- ret = osd_req_decode_sense(or, &osi);
- if (likely(!ret))
- continue;
-
- if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
- /* start read offset passed endof file */
- _clear_bio(ios->per_dev[i].bio);
- EXOFS_DBGMSG("start read offset passed end of file "
- "offset=0x%llx, length=0x%llx\n",
- _LLU(ios->per_dev[i].offset),
- _LLU(ios->per_dev[i].length));
-
- continue; /* we recovered */
- }
-
- if (osi.osd_err_pri >= acumulated_osd_err) {
- acumulated_osd_err = osi.osd_err_pri;
- acumulated_lin_err = ret;
- }
- }
-
- /* TODO: raid specific residual calculations */
- if (resid) {
- if (likely(!acumulated_lin_err))
- *resid = 0;
- else
- *resid = ios->length;
- }
-
- return acumulated_lin_err;
-}
-
-/*
- * L - logical offset into the file
- *
- * U - The number of bytes in a stripe within a group
- *
- * U = stripe_unit * group_width
- *
- * T - The number of bytes striped within a group of component objects
- * (before advancing to the next group)
- *
- * T = stripe_unit * group_width * group_depth
- *
- * S - The number of bytes striped across all component objects
- * before the pattern repeats
- *
- * S = stripe_unit * group_width * group_depth * group_count
- *
- * M - The "major" (i.e., across all components) stripe number
- *
- * M = L / S
- *
- * G - Counts the groups from the beginning of the major stripe
- *
- * G = (L - (M * S)) / T [or (L % S) / T]
- *
- * H - The byte offset within the group
- *
- * H = (L - (M * S)) % T [or (L % S) % T]
- *
- * N - The "minor" (i.e., across the group) stripe number
- *
- * N = H / U
- *
- * C - The component index coresponding to L
- *
- * C = (H - (N * U)) / stripe_unit + G * group_width
- * [or (L % U) / stripe_unit + G * group_width]
- *
- * O - The component offset coresponding to L
- *
- * O = L % stripe_unit + N * stripe_unit + M * group_depth * stripe_unit
- */
-struct _striping_info {
- u64 obj_offset;
- u64 group_length;
- unsigned dev;
- unsigned unit_off;
-};
-
-static void _calc_stripe_info(struct exofs_io_state *ios, u64 file_offset,
- struct _striping_info *si)
-{
- u32 stripe_unit = ios->layout->stripe_unit;
- u32 group_width = ios->layout->group_width;
- u64 group_depth = ios->layout->group_depth;
-
- u32 U = stripe_unit * group_width;
- u64 T = U * group_depth;
- u64 S = T * ios->layout->group_count;
- u64 M = div64_u64(file_offset, S);
-
- /*
- G = (L - (M * S)) / T
- H = (L - (M * S)) % T
- */
- u64 LmodS = file_offset - M * S;
- u32 G = div64_u64(LmodS, T);
- u64 H = LmodS - G * T;
-
- u32 N = div_u64(H, U);
-
- /* "H - (N * U)" is just "H % U" so it's bound to u32 */
- si->dev = (u32)(H - (N * U)) / stripe_unit + G * group_width;
- si->dev *= ios->layout->mirrors_p1;
-
- div_u64_rem(file_offset, stripe_unit, &si->unit_off);
-
- si->obj_offset = si->unit_off + (N * stripe_unit) +
- (M * group_depth * stripe_unit);
-
- si->group_length = T - H;
-}
-
-static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg,
- unsigned pgbase, struct exofs_per_dev_state *per_dev,
- int cur_len)
-{
- unsigned pg = *cur_pg;
- struct request_queue *q =
- osd_request_queue(exofs_ios_od(ios, per_dev->dev));
-
- per_dev->length += cur_len;
-
- if (per_dev->bio == NULL) {
- unsigned pages_in_stripe = ios->layout->group_width *
- (ios->layout->stripe_unit / PAGE_SIZE);
- unsigned bio_size = (ios->nr_pages + pages_in_stripe) /
- ios->layout->group_width;
-
- per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
- if (unlikely(!per_dev->bio)) {
- EXOFS_DBGMSG("Failed to allocate BIO size=%u\n",
- bio_size);
- return -ENOMEM;
- }
- }
-
- while (cur_len > 0) {
- unsigned pglen = min_t(unsigned, PAGE_SIZE - pgbase, cur_len);
- unsigned added_len;
-
- BUG_ON(ios->nr_pages <= pg);
- cur_len -= pglen;
-
- added_len = bio_add_pc_page(q, per_dev->bio, ios->pages[pg],
- pglen, pgbase);
- if (unlikely(pglen != added_len))
- return -ENOMEM;
- pgbase = 0;
- ++pg;
- }
- BUG_ON(cur_len);
-
- *cur_pg = pg;
- return 0;
-}
-
-static int _prepare_one_group(struct exofs_io_state *ios, u64 length,
- struct _striping_info *si)
-{
- unsigned stripe_unit = ios->layout->stripe_unit;
- unsigned mirrors_p1 = ios->layout->mirrors_p1;
- unsigned devs_in_group = ios->layout->group_width * mirrors_p1;
- unsigned dev = si->dev;
- unsigned first_dev = dev - (dev % devs_in_group);
- unsigned max_comp = ios->numdevs ? ios->numdevs - mirrors_p1 : 0;
- unsigned cur_pg = ios->pages_consumed;
- int ret = 0;
-
- while (length) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[dev];
- unsigned cur_len, page_off = 0;
-
- if (!per_dev->length) {
- per_dev->dev = dev;
- if (dev < si->dev) {
- per_dev->offset = si->obj_offset + stripe_unit -
- si->unit_off;
- cur_len = stripe_unit;
- } else if (dev == si->dev) {
- per_dev->offset = si->obj_offset;
- cur_len = stripe_unit - si->unit_off;
- page_off = si->unit_off & ~PAGE_MASK;
- BUG_ON(page_off && (page_off != ios->pgbase));
- } else { /* dev > si->dev */
- per_dev->offset = si->obj_offset - si->unit_off;
- cur_len = stripe_unit;
- }
-
- if (max_comp < dev)
- max_comp = dev;
- } else {
- cur_len = stripe_unit;
- }
- if (cur_len >= length)
- cur_len = length;
-
- ret = _add_stripe_unit(ios, &cur_pg, page_off , per_dev,
- cur_len);
- if (unlikely(ret))
- goto out;
-
- dev += mirrors_p1;
- dev = (dev % devs_in_group) + first_dev;
-
- length -= cur_len;
- }
-out:
- ios->numdevs = max_comp + mirrors_p1;
- ios->pages_consumed = cur_pg;
- return ret;
-}
-
-static int _prepare_for_striping(struct exofs_io_state *ios)
-{
- u64 length = ios->length;
- u64 offset = ios->offset;
- struct _striping_info si;
- int ret = 0;
-
- if (!ios->pages) {
- if (ios->kern_buff) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[0];
-
- _calc_stripe_info(ios, ios->offset, &si);
- per_dev->offset = si.obj_offset;
- per_dev->dev = si.dev;
-
- /* no cross device without page array */
- BUG_ON((ios->layout->group_width > 1) &&
- (si.unit_off + ios->length >
- ios->layout->stripe_unit));
- }
- ios->numdevs = ios->layout->mirrors_p1;
- return 0;
- }
-
- while (length) {
- _calc_stripe_info(ios, offset, &si);
-
- if (length < si.group_length)
- si.group_length = length;
-
- ret = _prepare_one_group(ios, si.group_length, &si);
- if (unlikely(ret))
- goto out;
-
- offset += si.group_length;
- length -= si.group_length;
- }
-
-out:
- return ret;
-}
-
-int exofs_sbi_create(struct exofs_io_state *ios)
-{
- int i, ret;
-
- for (i = 0; i < ios->layout->s_numdevs; i++) {
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- ret = -ENOMEM;
- goto out;
- }
- ios->per_dev[i].or = or;
- ios->numdevs++;
-
- osd_req_create_object(or, &ios->obj);
- }
- ret = exofs_io_execute(ios);
-
-out:
- return ret;
-}
-
-int exofs_sbi_remove(struct exofs_io_state *ios)
-{
- int i, ret;
-
- for (i = 0; i < ios->layout->s_numdevs; i++) {
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, i), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- ret = -ENOMEM;
- goto out;
- }
- ios->per_dev[i].or = or;
- ios->numdevs++;
-
- osd_req_remove_object(or, &ios->obj);
- }
- ret = exofs_io_execute(ios);
-
-out:
- return ret;
-}
-
-static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp)
-{
- struct exofs_per_dev_state *master_dev = &ios->per_dev[cur_comp];
- unsigned dev = ios->per_dev[cur_comp].dev;
- unsigned last_comp = cur_comp + ios->layout->mirrors_p1;
- int ret = 0;
-
- if (ios->pages && !master_dev->length)
- return 0; /* Just an empty slot */
-
- for (; cur_comp < last_comp; ++cur_comp, ++dev) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, dev), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- ret = -ENOMEM;
- goto out;
- }
- per_dev->or = or;
- per_dev->offset = master_dev->offset;
-
- if (ios->pages) {
- struct bio *bio;
-
- if (per_dev != master_dev) {
- bio = bio_kmalloc(GFP_KERNEL,
- master_dev->bio->bi_max_vecs);
- if (unlikely(!bio)) {
- EXOFS_DBGMSG(
- "Failed to allocate BIO size=%u\n",
- master_dev->bio->bi_max_vecs);
- ret = -ENOMEM;
- goto out;
- }
-
- __bio_clone(bio, master_dev->bio);
- bio->bi_bdev = NULL;
- bio->bi_next = NULL;
- per_dev->length = master_dev->length;
- per_dev->bio = bio;
- per_dev->dev = dev;
- } else {
- bio = master_dev->bio;
- /* FIXME: bio_set_dir() */
- bio->bi_rw |= REQ_WRITE;
- }
-
- osd_req_write(or, &ios->obj, per_dev->offset, bio,
- per_dev->length);
- EXOFS_DBGMSG("write(0x%llx) offset=0x%llx "
- "length=0x%llx dev=%d\n",
- _LLU(ios->obj.id), _LLU(per_dev->offset),
- _LLU(per_dev->length), dev);
- } else if (ios->kern_buff) {
- ret = osd_req_write_kern(or, &ios->obj, per_dev->offset,
- ios->kern_buff, ios->length);
- if (unlikely(ret))
- goto out;
- EXOFS_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
- "length=0x%llx dev=%d\n",
- _LLU(ios->obj.id), _LLU(per_dev->offset),
- _LLU(ios->length), dev);
- } else {
- osd_req_set_attributes(or, &ios->obj);
- EXOFS_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
- _LLU(ios->obj.id), ios->out_attr_len, dev);
- }
-
- if (ios->out_attr)
- osd_req_add_set_attr_list(or, ios->out_attr,
- ios->out_attr_len);
-
- if (ios->in_attr)
- osd_req_add_get_attr_list(or, ios->in_attr,
- ios->in_attr_len);
- }
-
-out:
- return ret;
-}
-
-int exofs_sbi_write(struct exofs_io_state *ios)
-{
- int i;
- int ret;
-
- ret = _prepare_for_striping(ios);
- if (unlikely(ret))
- return ret;
-
- for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
- ret = _sbi_write_mirror(ios, i);
- if (unlikely(ret))
- return ret;
- }
-
- ret = exofs_io_execute(ios);
- return ret;
-}
-
-static int _sbi_read_mirror(struct exofs_io_state *ios, unsigned cur_comp)
-{
- struct osd_request *or;
- struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
- unsigned first_dev = (unsigned)ios->obj.id;
-
- if (ios->pages && !per_dev->length)
- return 0; /* Just an empty slot */
-
- first_dev = per_dev->dev + first_dev % ios->layout->mirrors_p1;
- or = osd_start_request(exofs_ios_od(ios, first_dev), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- return -ENOMEM;
- }
- per_dev->or = or;
-
- if (ios->pages) {
- osd_req_read(or, &ios->obj, per_dev->offset,
- per_dev->bio, per_dev->length);
- EXOFS_DBGMSG("read(0x%llx) offset=0x%llx length=0x%llx"
- " dev=%d\n", _LLU(ios->obj.id),
- _LLU(per_dev->offset), _LLU(per_dev->length),
- first_dev);
- } else if (ios->kern_buff) {
- int ret = osd_req_read_kern(or, &ios->obj, per_dev->offset,
- ios->kern_buff, ios->length);
- EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
- "length=0x%llx dev=%d ret=>%d\n",
- _LLU(ios->obj.id), _LLU(per_dev->offset),
- _LLU(ios->length), first_dev, ret);
- if (unlikely(ret))
- return ret;
- } else {
- osd_req_get_attributes(or, &ios->obj);
- EXOFS_DBGMSG2("obj(0x%llx) get_attributes=%d dev=%d\n",
- _LLU(ios->obj.id), ios->in_attr_len, first_dev);
- }
- if (ios->out_attr)
- osd_req_add_set_attr_list(or, ios->out_attr, ios->out_attr_len);
-
- if (ios->in_attr)
- osd_req_add_get_attr_list(or, ios->in_attr, ios->in_attr_len);
-
- return 0;
-}
-
-int exofs_sbi_read(struct exofs_io_state *ios)
-{
- int i;
- int ret;
-
- ret = _prepare_for_striping(ios);
- if (unlikely(ret))
- return ret;
-
- for (i = 0; i < ios->numdevs; i += ios->layout->mirrors_p1) {
- ret = _sbi_read_mirror(ios, i);
- if (unlikely(ret))
- return ret;
- }
-
- ret = exofs_io_execute(ios);
- return ret;
-}
-
-int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
-{
- struct osd_attr cur_attr = {.attr_page = 0}; /* start with zeros */
- void *iter = NULL;
- int nelem;
-
- do {
- nelem = 1;
- osd_req_decode_get_attr_list(ios->per_dev[0].or,
- &cur_attr, &nelem, &iter);
- if ((cur_attr.attr_page == attr->attr_page) &&
- (cur_attr.attr_id == attr->attr_id)) {
- attr->len = cur_attr.len;
- attr->val_ptr = cur_attr.val_ptr;
- return 0;
- }
- } while (iter);
-
- return -EIO;
-}
-
-static int _truncate_mirrors(struct exofs_io_state *ios, unsigned cur_comp,
- struct osd_attr *attr)
-{
- int last_comp = cur_comp + ios->layout->mirrors_p1;
-
- for (; cur_comp < last_comp; ++cur_comp) {
- struct exofs_per_dev_state *per_dev = &ios->per_dev[cur_comp];
- struct osd_request *or;
-
- or = osd_start_request(exofs_ios_od(ios, cur_comp), GFP_KERNEL);
- if (unlikely(!or)) {
- EXOFS_ERR("%s: osd_start_request failed\n", __func__);
- return -ENOMEM;
- }
- per_dev->or = or;
-
- osd_req_set_attributes(or, &ios->obj);
- osd_req_add_set_attr_list(or, attr, 1);
- }
-
- return 0;
-}
-
-int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
-{
- struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
- struct exofs_io_state *ios;
- struct exofs_trunc_attr {
- struct osd_attr attr;
- __be64 newsize;
- } *size_attrs;
- struct _striping_info si;
- int i, ret;
-
- ret = exofs_get_io_state(&sbi->layout, &ios);
- if (unlikely(ret))
- return ret;
-
- size_attrs = kcalloc(ios->layout->group_width, sizeof(*size_attrs),
- GFP_KERNEL);
- if (unlikely(!size_attrs)) {
- ret = -ENOMEM;
- goto out;
- }
-
- ios->obj.id = exofs_oi_objno(oi);
- ios->cred = oi->i_cred;
-
- ios->numdevs = ios->layout->s_numdevs;
- _calc_stripe_info(ios, size, &si);
-
- for (i = 0; i < ios->layout->group_width; ++i) {
- struct exofs_trunc_attr *size_attr = &size_attrs[i];
- u64 obj_size;
-
- if (i < si.dev)
- obj_size = si.obj_offset +
- ios->layout->stripe_unit - si.unit_off;
- else if (i == si.dev)
- obj_size = si.obj_offset;
- else /* i > si.dev */
- obj_size = si.obj_offset - si.unit_off;
-
- size_attr->newsize = cpu_to_be64(obj_size);
- size_attr->attr = g_attr_logical_length;
- size_attr->attr.val_ptr = &size_attr->newsize;
-
- ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
- &size_attr->attr);
- if (unlikely(ret))
- goto out;
- }
- ret = exofs_io_execute(ios);
-
-out:
- kfree(size_attrs);
- exofs_put_io_state(ios);
- return ret;
-}
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4d70db1..b54c437 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -55,12 +55,7 @@ static struct dentry *exofs_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(-ENAMETOOLONG);
ino = exofs_inode_by_name(dir, dentry);
- inode = NULL;
- if (ino) {
- inode = exofs_iget(dir->i_sb, ino);
- if (IS_ERR(inode))
- return ERR_CAST(inode);
- }
+ inode = ino ? exofs_iget(dir->i_sb, ino) : NULL;
return d_splice_alias(inode, dentry);
}
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
deleted file mode 100644
index c52e988..0000000
--- a/fs/exofs/pnfs.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
- *
- * This file is part of exofs.
- *
- * exofs is free software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License version 2 as published by the Free
- * Software Foundation.
- *
- */
-
-/* FIXME: Remove this file once pnfs hits mainline */
-
-#ifndef __EXOFS_PNFS_H__
-#define __EXOFS_PNFS_H__
-
-#if ! defined(__PNFS_OSD_XDR_H__)
-
-enum pnfs_iomode {
- IOMODE_READ = 1,
- IOMODE_RW = 2,
- IOMODE_ANY = 3,
-};
-
-/* Layout Structure */
-enum pnfs_osd_raid_algorithm4 {
- PNFS_OSD_RAID_0 = 1,
- PNFS_OSD_RAID_4 = 2,
- PNFS_OSD_RAID_5 = 3,
- PNFS_OSD_RAID_PQ = 4 /* Reed-Solomon P+Q */
-};
-
-struct pnfs_osd_data_map {
- u32 odm_num_comps;
- u64 odm_stripe_unit;
- u32 odm_group_width;
- u32 odm_group_depth;
- u32 odm_mirror_cnt;
- u32 odm_raid_algorithm;
-};
-
-#endif /* ! defined(__PNFS_OSD_XDR_H__) */
-
-#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index c57bedd..7ed5000 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -35,11 +35,14 @@
#include <linux/parser.h>
#include <linux/vfs.h>
#include <linux/random.h>
+#include <linux/module.h>
#include <linux/exportfs.h>
#include <linux/slab.h>
#include "exofs.h"
+#define EXOFS_DBGMSG2(M...) do {} while (0)
+
/******************************************************************************
* MOUNT OPTIONS
*****************************************************************************/
@@ -208,10 +211,48 @@ static void destroy_inodecache(void)
}
/******************************************************************************
- * SUPERBLOCK FUNCTIONS
+ * Some osd helpers
*****************************************************************************/
-static const struct super_operations exofs_sops;
-static const struct export_operations exofs_export_ops;
+void exofs_make_credential(u8 cred_a[OSD_CAP_LEN], const struct osd_obj_id *obj)
+{
+ osd_sec_init_nosec_doall_caps(cred_a, obj, false, true);
+}
+
+static int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj,
+ u64 offset, void *p, unsigned length)
+{
+ struct osd_request *or = osd_start_request(od, GFP_KERNEL);
+/* struct osd_sense_info osi = {.key = 0};*/
+ int ret;
+
+ if (unlikely(!or)) {
+ EXOFS_DBGMSG("%s: osd_start_request failed.\n", __func__);
+ return -ENOMEM;
+ }
+ ret = osd_req_read_kern(or, obj, offset, p, length);
+ if (unlikely(ret)) {
+ EXOFS_DBGMSG("%s: osd_req_read_kern failed.\n", __func__);
+ goto out;
+ }
+
+ ret = osd_finalize_request(or, 0, cred, NULL);
+ if (unlikely(ret)) {
+ EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret);
+ goto out;
+ }
+
+ ret = osd_execute_request(or);
+ if (unlikely(ret))
+ EXOFS_DBGMSG("osd_execute_request() => %d\n", ret);
+ /* osd_req_decode_sense(or, ret); */
+
+out:
+ osd_end_request(or);
+ EXOFS_DBGMSG2("read_kern(0x%llx) offset=0x%llx "
+ "length=0x%llx dev=%p ret=>%d\n",
+ _LLU(obj->id), _LLU(offset), _LLU(length), od, ret);
+ return ret;
+}
static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
EXOFS_APAGE_SB_DATA,
@@ -223,21 +264,19 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
struct osd_attr attrs[] = {
[0] = g_attr_sb_stats,
};
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret;
}
- ios->cred = sbi->s_cred;
-
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_read(ios);
+ ret = ore_read(ios);
if (unlikely(ret)) {
EXOFS_ERR("Error reading super_block stats => %d\n", ret);
goto out;
@@ -264,13 +303,13 @@ static int __sbi_read_stats(struct exofs_sb_info *sbi)
}
out:
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ret;
}
-static void stats_done(struct exofs_io_state *ios, void *p)
+static void stats_done(struct ore_io_state *ios, void *p)
{
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
/* Good thanks nothing to do anymore */
}
@@ -280,12 +319,12 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
struct osd_attr attrs[] = {
[0] = g_attr_sb_stats,
};
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+ EXOFS_ERR("%s: ore_get_io_state failed.\n", __func__);
return ret;
}
@@ -293,29 +332,37 @@ int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
attrs[0].val_ptr = &sbi->s_ess;
- ios->cred = sbi->s_cred;
+
ios->done = stats_done;
ios->private = sbi;
ios->out_attr = attrs;
ios->out_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_write(ios);
+ ret = ore_write(ios);
if (unlikely(ret)) {
- EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
- exofs_put_io_state(ios);
+ EXOFS_ERR("%s: ore_write failed.\n", __func__);
+ ore_put_io_state(ios);
}
return ret;
}
+/******************************************************************************
+ * SUPERBLOCK FUNCTIONS
+ *****************************************************************************/
+static const struct super_operations exofs_sops;
+static const struct export_operations exofs_export_ops;
+
/*
* Write the superblock to the OSD
*/
-int exofs_sync_fs(struct super_block *sb, int wait)
+static int exofs_sync_fs(struct super_block *sb, int wait)
{
struct exofs_sb_info *sbi;
struct exofs_fscb *fscb;
- struct exofs_io_state *ios;
+ struct ore_comp one_comp;
+ struct ore_components oc;
+ struct ore_io_state *ios;
int ret = -ENOMEM;
fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
@@ -331,7 +378,10 @@ int exofs_sync_fs(struct super_block *sb, int wait)
* version). Otherwise the exofs_fscb is read-only from mkfs time. All
* the writeable info is set in exofs_sbi_write_stats() above.
*/
- ret = exofs_get_io_state(&sbi->layout, &ios);
+
+ exofs_init_comps(&oc, &one_comp, sbi, EXOFS_SUPER_ID);
+
+ ret = ore_get_io_state(&sbi->layout, &oc, &ios);
if (unlikely(ret))
goto out;
@@ -345,14 +395,12 @@ int exofs_sync_fs(struct super_block *sb, int wait)
fscb->s_newfs = 0;
fscb->s_version = EXOFS_FSCB_VER;
- ios->obj.id = EXOFS_SUPER_ID;
ios->offset = 0;
ios->kern_buff = fscb;
- ios->cred = sbi->s_cred;
- ret = exofs_sbi_write(ios);
+ ret = ore_write(ios);
if (unlikely(ret))
- EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
+ EXOFS_ERR("%s: ore_write failed.\n", __func__);
else
sb->s_dirt = 0;
@@ -360,7 +408,7 @@ int exofs_sync_fs(struct super_block *sb, int wait)
unlock_super(sb);
out:
EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
kfree(fscb);
return ret;
}
@@ -382,17 +430,20 @@ static void _exofs_print_device(const char *msg, const char *dev_path,
msg, dev_path ?: "", odi->osdname, _LLU(pid));
}
-void exofs_free_sbi(struct exofs_sb_info *sbi)
+static void exofs_free_sbi(struct exofs_sb_info *sbi)
{
- while (sbi->layout.s_numdevs) {
- int i = --sbi->layout.s_numdevs;
- struct osd_dev *od = sbi->layout.s_ods[i];
+ unsigned numdevs = sbi->oc.numdevs;
+
+ while (numdevs) {
+ unsigned i = --numdevs;
+ struct osd_dev *od = ore_comp_dev(&sbi->oc, i);
if (od) {
- sbi->layout.s_ods[i] = NULL;
+ ore_comp_set_dev(&sbi->oc, i, NULL);
osduld_put_device(od);
}
}
+ kfree(sbi->oc.ods);
kfree(sbi);
}
@@ -419,8 +470,8 @@ static void exofs_put_super(struct super_block *sb)
msecs_to_jiffies(100));
}
- _exofs_print_device("Unmounting", NULL, sbi->layout.s_ods[0],
- sbi->layout.s_pid);
+ _exofs_print_device("Unmounting", NULL, ore_comp_dev(&sbi->oc, 0),
+ sbi->one_comp.obj.partition);
bdi_destroy(&sbi->bdi);
exofs_free_sbi(sbi);
@@ -430,81 +481,34 @@ static void exofs_put_super(struct super_block *sb)
static int _read_and_match_data_map(struct exofs_sb_info *sbi, unsigned numdevs,
struct exofs_device_table *dt)
{
- u64 stripe_length;
+ int ret;
- sbi->data_map.odm_num_comps =
- le32_to_cpu(dt->dt_data_map.cb_num_comps);
- sbi->data_map.odm_stripe_unit =
+ sbi->layout.stripe_unit =
le64_to_cpu(dt->dt_data_map.cb_stripe_unit);
- sbi->data_map.odm_group_width =
+ sbi->layout.group_width =
le32_to_cpu(dt->dt_data_map.cb_group_width);
- sbi->data_map.odm_group_depth =
+ sbi->layout.group_depth =
le32_to_cpu(dt->dt_data_map.cb_group_depth);
- sbi->data_map.odm_mirror_cnt =
- le32_to_cpu(dt->dt_data_map.cb_mirror_cnt);
- sbi->data_map.odm_raid_algorithm =
+ sbi->layout.mirrors_p1 =
+ le32_to_cpu(dt->dt_data_map.cb_mirror_cnt) + 1;
+ sbi->layout.raid_algorithm =
le32_to_cpu(dt->dt_data_map.cb_raid_algorithm);
-/* FIXME: Only raid0 for now. if not so, do not mount */
- if (sbi->data_map.odm_num_comps != numdevs) {
- EXOFS_ERR("odm_num_comps(%u) != numdevs(%u)\n",
- sbi->data_map.odm_num_comps, numdevs);
- return -EINVAL;
- }
- if (sbi->data_map.odm_raid_algorithm != PNFS_OSD_RAID_0) {
- EXOFS_ERR("Only RAID_0 for now\n");
- return -EINVAL;
- }
- if (0 != (numdevs % (sbi->data_map.odm_mirror_cnt + 1))) {
- EXOFS_ERR("Data Map wrong, numdevs=%d mirrors=%d\n",
- numdevs, sbi->data_map.odm_mirror_cnt);
- return -EINVAL;
- }
-
- if (0 != (sbi->data_map.odm_stripe_unit & ~PAGE_MASK)) {
- EXOFS_ERR("Stripe Unit(0x%llx)"
- " must be Multples of PAGE_SIZE(0x%lx)\n",
- _LLU(sbi->data_map.odm_stripe_unit), PAGE_SIZE);
- return -EINVAL;
- }
-
- sbi->layout.stripe_unit = sbi->data_map.odm_stripe_unit;
- sbi->layout.mirrors_p1 = sbi->data_map.odm_mirror_cnt + 1;
-
- if (sbi->data_map.odm_group_width) {
- sbi->layout.group_width = sbi->data_map.odm_group_width;
- sbi->layout.group_depth = sbi->data_map.odm_group_depth;
- if (!sbi->layout.group_depth) {
- EXOFS_ERR("group_depth == 0 && group_width != 0\n");
- return -EINVAL;
- }
- sbi->layout.group_count = sbi->data_map.odm_num_comps /
- sbi->layout.mirrors_p1 /
- sbi->data_map.odm_group_width;
- } else {
- if (sbi->data_map.odm_group_depth) {
- printk(KERN_NOTICE "Warning: group_depth ignored "
- "group_width == 0 && group_depth == %d\n",
- sbi->data_map.odm_group_depth);
- sbi->data_map.odm_group_depth = 0;
- }
- sbi->layout.group_width = sbi->data_map.odm_num_comps /
- sbi->layout.mirrors_p1;
- sbi->layout.group_depth = -1;
- sbi->layout.group_count = 1;
- }
-
- stripe_length = (u64)sbi->layout.group_width * sbi->layout.stripe_unit;
- if (stripe_length >= (1ULL << 32)) {
- EXOFS_ERR("Total Stripe length(0x%llx)"
- " >= 32bit is not supported\n", _LLU(stripe_length));
- return -EINVAL;
- }
-
- return 0;
+ ret = ore_verify_layout(numdevs, &sbi->layout);
+
+ EXOFS_DBGMSG("exofs: layout: "
+ "num_comps=%u stripe_unit=0x%x group_width=%u "
+ "group_depth=0x%llx mirrors_p1=%u raid_algorithm=%u\n",
+ numdevs,
+ sbi->layout.stripe_unit,
+ sbi->layout.group_width,
+ _LLU(sbi->layout.group_depth),
+ sbi->layout.mirrors_p1,
+ sbi->layout.raid_algorithm);
+ return ret;
}
-static unsigned __ra_pages(struct exofs_layout *layout)
+static unsigned __ra_pages(struct ore_layout *layout)
{
const unsigned _MIN_RA = 32; /* min 128K read-ahead */
unsigned ra_pages = layout->group_width * layout->stripe_unit /
@@ -547,14 +551,40 @@ static int exofs_devs_2_odi(struct exofs_dt_device_info *dt_dev,
return !(odi->systemid_len || odi->osdname_len);
}
-static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
+int __alloc_dev_table(struct exofs_sb_info *sbi, unsigned numdevs,
+ struct exofs_dev **peds)
+{
+ struct __alloc_ore_devs_and_exofs_devs {
+ /* Twice bigger table: See exofs_init_comps() and comment at
+ * exofs_read_lookup_dev_table()
+ */
+ struct ore_dev *oreds[numdevs * 2 - 1];
+ struct exofs_dev eds[numdevs];
+ } *aoded;
+ struct exofs_dev *eds;
+ unsigned i;
+
+ aoded = kzalloc(sizeof(*aoded), GFP_KERNEL);
+ if (unlikely(!aoded)) {
+ EXOFS_ERR("ERROR: faild allocating Device array[%d]\n",
+ numdevs);
+ return -ENOMEM;
+ }
+
+ sbi->oc.ods = aoded->oreds;
+ *peds = eds = aoded->eds;
+ for (i = 0; i < numdevs; ++i)
+ aoded->oreds[i] = &eds[i].ored;
+ return 0;
+}
+
+static int exofs_read_lookup_dev_table(struct exofs_sb_info *sbi,
+ struct osd_dev *fscb_od,
unsigned table_count)
{
- struct exofs_sb_info *sbi = *psbi;
- struct osd_dev *fscb_od;
- struct osd_obj_id obj = {.partition = sbi->layout.s_pid,
- .id = EXOFS_DEVTABLE_ID};
+ struct ore_comp comp;
struct exofs_device_table *dt;
+ struct exofs_dev *eds;
unsigned table_bytes = table_count * sizeof(dt->dt_dev_table[0]) +
sizeof(*dt);
unsigned numdevs, i;
@@ -567,10 +597,14 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
return -ENOMEM;
}
- fscb_od = sbi->layout.s_ods[0];
- sbi->layout.s_ods[0] = NULL;
- sbi->layout.s_numdevs = 0;
- ret = exofs_read_kern(fscb_od, sbi->s_cred, &obj, 0, dt, table_bytes);
+ sbi->oc.numdevs = 0;
+
+ comp.obj.partition = sbi->one_comp.obj.partition;
+ comp.obj.id = EXOFS_DEVTABLE_ID;
+ exofs_make_credential(comp.cred, &comp.obj);
+
+ ret = exofs_read_kern(fscb_od, comp.cred, &comp.obj, 0, dt,
+ table_bytes);
if (unlikely(ret)) {
EXOFS_ERR("ERROR: reading device table\n");
goto out;
@@ -587,18 +621,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
if (unlikely(ret))
goto out;
- if (likely(numdevs > 1)) {
- unsigned size = numdevs * sizeof(sbi->layout.s_ods[0]);
-
- sbi = krealloc(sbi, sizeof(*sbi) + size, GFP_KERNEL);
- if (unlikely(!sbi)) {
- ret = -ENOMEM;
- goto out;
- }
- memset(&sbi->layout.s_ods[1], 0,
- size - sizeof(sbi->layout.s_ods[0]));
- *psbi = sbi;
- }
+ ret = __alloc_dev_table(sbi, numdevs, &eds);
+ if (unlikely(ret))
+ goto out;
+ /* exofs round-robins the device table view according to inode
+ * number. We hold a: twice bigger table hence inodes can point
+ * to any device and have a sequential view of the table
+ * starting at this device. See exofs_init_comps()
+ */
+ memcpy(&sbi->oc.ods[numdevs], &sbi->oc.ods[0],
+ (numdevs - 1) * sizeof(sbi->oc.ods[0]));
for (i = 0; i < numdevs; i++) {
struct exofs_fscb fscb;
@@ -614,13 +646,16 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
printk(KERN_NOTICE "Add device[%d]: osd_name-%s\n",
i, odi.osdname);
+ /* the exofs id is currently the table index */
+ eds[i].did = i;
+
/* On all devices the device table is identical. The user can
* specify any one of the participating devices on the command
* line. We always keep them in device-table order.
*/
if (fscb_od && osduld_device_same(fscb_od, &odi)) {
- sbi->layout.s_ods[i] = fscb_od;
- ++sbi->layout.s_numdevs;
+ eds[i].ored.od = fscb_od;
+ ++sbi->oc.numdevs;
fscb_od = NULL;
continue;
}
@@ -633,13 +668,13 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
goto out;
}
- sbi->layout.s_ods[i] = od;
- ++sbi->layout.s_numdevs;
+ eds[i].ored.od = od;
+ ++sbi->oc.numdevs;
/* Read the fscb of the other devices to make sure the FS
* partition is there.
*/
- ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb,
+ ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb,
sizeof(fscb));
if (unlikely(ret)) {
EXOFS_ERR("ERROR: Malformed participating device "
@@ -656,13 +691,11 @@ static int exofs_read_lookup_dev_table(struct exofs_sb_info **psbi,
out:
kfree(dt);
- if (unlikely(!ret && fscb_od)) {
- EXOFS_ERR(
- "ERROR: Bad device-table container device not present\n");
- osduld_put_device(fscb_od);
- ret = -EINVAL;
+ if (unlikely(fscb_od && !ret)) {
+ EXOFS_ERR("ERROR: Bad device-table container device not present\n");
+ osduld_put_device(fscb_od);
+ return -EINVAL;
}
-
return ret;
}
@@ -676,7 +709,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
struct exofs_sb_info *sbi; /*extended info */
struct osd_dev *od; /* Master device */
struct exofs_fscb fscb; /*on-disk superblock info */
- struct osd_obj_id obj;
+ struct ore_comp comp;
unsigned table_count;
int ret;
@@ -684,10 +717,6 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
if (!sbi)
return -ENOMEM;
- ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
- if (ret)
- goto free_bdi;
-
/* use mount options to fill superblock */
if (opts->is_osdname) {
struct osd_dev_info odi = {.systemid_len = 0};
@@ -695,6 +724,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
odi.osdname_len = strlen(opts->dev_name);
odi.osdname = (u8 *)opts->dev_name;
od = osduld_info_lookup(&odi);
+ kfree(opts->dev_name);
+ opts->dev_name = NULL;
} else {
od = osduld_path_lookup(opts->dev_name);
}
@@ -709,11 +740,14 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sbi->layout.group_width = 1;
sbi->layout.group_depth = -1;
sbi->layout.group_count = 1;
- sbi->layout.s_ods[0] = od;
- sbi->layout.s_numdevs = 1;
- sbi->layout.s_pid = opts->pid;
sbi->s_timeout = opts->timeout;
+ sbi->one_comp.obj.partition = opts->pid;
+ sbi->one_comp.obj.id = 0;
+ exofs_make_credential(sbi->one_comp.cred, &sbi->one_comp.obj);
+ sbi->oc.single_comp = EC_SINGLE_COMP;
+ sbi->oc.comps = &sbi->one_comp;
+
/* fill in some other data by hand */
memset(sb->s_id, 0, sizeof(sb->s_id));
strcpy(sb->s_id, "exofs");
@@ -724,11 +758,11 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_bdev = NULL;
sb->s_dev = 0;
- obj.partition = sbi->layout.s_pid;
- obj.id = EXOFS_SUPER_ID;
- exofs_make_credential(sbi->s_cred, &obj);
+ comp.obj.partition = sbi->one_comp.obj.partition;
+ comp.obj.id = EXOFS_SUPER_ID;
+ exofs_make_credential(comp.cred, &comp.obj);
- ret = exofs_read_kern(od, sbi->s_cred, &obj, 0, &fscb, sizeof(fscb));
+ ret = exofs_read_kern(od, comp.cred, &comp.obj, 0, &fscb, sizeof(fscb));
if (unlikely(ret))
goto free_sbi;
@@ -757,9 +791,18 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
table_count = le64_to_cpu(fscb.s_dev_table_count);
if (table_count) {
- ret = exofs_read_lookup_dev_table(&sbi, table_count);
+ ret = exofs_read_lookup_dev_table(sbi, od, table_count);
+ if (unlikely(ret))
+ goto free_sbi;
+ } else {
+ struct exofs_dev *eds;
+
+ ret = __alloc_dev_table(sbi, 1, &eds);
if (unlikely(ret))
goto free_sbi;
+
+ ore_comp_set_dev(&sbi->oc, 0, od);
+ sbi->oc.numdevs = 1;
}
__sbi_read_stats(sbi);
@@ -793,20 +836,21 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
goto free_sbi;
}
- _exofs_print_device("Mounting", opts->dev_name, sbi->layout.s_ods[0],
- sbi->layout.s_pid);
- if (opts->is_osdname)
- kfree(opts->dev_name);
+ ret = bdi_setup_and_register(&sbi->bdi, "exofs", BDI_CAP_MAP_COPY);
+ if (ret) {
+ EXOFS_DBGMSG("Failed to bdi_setup_and_register\n");
+ goto free_sbi;
+ }
+
+ _exofs_print_device("Mounting", opts->dev_name,
+ ore_comp_dev(&sbi->oc, 0),
+ sbi->one_comp.obj.partition);
return 0;
free_sbi:
- bdi_destroy(&sbi->bdi);
-free_bdi:
EXOFS_ERR("Unable to mount exofs on %s pid=0x%llx err=%d\n",
- opts->dev_name, sbi->layout.s_pid, ret);
+ opts->dev_name, sbi->one_comp.obj.partition, ret);
exofs_free_sbi(sbi);
- if (opts->is_osdname)
- kfree(opts->dev_name);
return ret;
}
@@ -837,7 +881,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct exofs_sb_info *sbi = sb->s_fs_info;
- struct exofs_io_state *ios;
+ struct ore_io_state *ios;
struct osd_attr attrs[] = {
ATTR_DEF(OSD_APAGE_PARTITION_QUOTAS,
OSD_ATTR_PQ_CAPACITY_QUOTA, sizeof(__be64)),
@@ -846,21 +890,18 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
};
uint64_t capacity = ULLONG_MAX;
uint64_t used = ULLONG_MAX;
- uint8_t cred_a[OSD_CAP_LEN];
int ret;
- ret = exofs_get_io_state(&sbi->layout, &ios);
+ ret = ore_get_io_state(&sbi->layout, &sbi->oc, &ios);
if (ret) {
- EXOFS_DBGMSG("exofs_get_io_state failed.\n");
+ EXOFS_DBGMSG("ore_get_io_state failed.\n");
return ret;
}
- exofs_make_credential(cred_a, &ios->obj);
- ios->cred = sbi->s_cred;
ios->in_attr = attrs;
ios->in_attr_len = ARRAY_SIZE(attrs);
- ret = exofs_sbi_read(ios);
+ ret = ore_read(ios);
if (unlikely(ret))
goto out;
@@ -889,7 +930,7 @@ static int exofs_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_namelen = EXOFS_NAME_LEN;
out:
- exofs_put_io_state(ios);
+ ore_put_io_state(ios);
return ret;
}
@@ -908,7 +949,7 @@ static const struct super_operations exofs_sops = {
* EXPORT OPERATIONS
*****************************************************************************/
-struct dentry *exofs_get_parent(struct dentry *child)
+static struct dentry *exofs_get_parent(struct dentry *child)
{
unsigned long ino = exofs_parent_ino(child);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b05acb7..3bbf5e7 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result,
inode = result->d_inode;
spin_lock(&inode->i_lock);
- list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ list_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
dget(dentry);
spin_unlock(&inode->i_lock);
if (toput)
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 9d021c0..3091f62 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -131,7 +131,7 @@ fail:
*
* inode->i_mutex: don't care
*/
-static struct posix_acl *
+struct posix_acl *
ext3_get_acl(struct inode *inode, int type)
{
int name_index;
@@ -199,12 +199,10 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
case ACL_TYPE_ACCESS:
name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- mode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
+ error = posix_acl_equiv_mode(acl, &inode->i_mode);
if (error < 0)
return error;
else {
- inode->i_mode = mode;
inode->i_ctime = CURRENT_TIME_SEC;
ext3_mark_inode_dirty(handle, inode);
if (error == 0)
@@ -239,29 +237,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
return error;
}
-int
-ext3_check_acl(struct inode *inode, int mask, unsigned int flags)
-{
- struct posix_acl *acl;
-
- if (flags & IPERM_FLAG_RCU) {
- if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
- return -ECHILD;
- return -EAGAIN;
- }
-
- acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- int error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- return error;
- }
-
- return -EAGAIN;
-}
-
/*
* Initialize the ACLs of a new inode. Called from ext3_new_inode.
*
@@ -284,31 +259,20 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
inode->i_mode &= ~current_umask();
}
if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
- struct posix_acl *clone;
- mode_t mode;
-
if (S_ISDIR(inode->i_mode)) {
error = ext3_set_acl(handle, inode,
ACL_TYPE_DEFAULT, acl);
if (error)
goto cleanup;
}
- clone = posix_acl_clone(acl, GFP_NOFS);
- error = -ENOMEM;
- if (!clone)
- goto cleanup;
-
- mode = inode->i_mode;
- error = posix_acl_create_masq(clone, &mode);
- if (error >= 0) {
- inode->i_mode = mode;
- if (error > 0) {
- /* This is an extended ACL */
- error = ext3_set_acl(handle, inode,
- ACL_TYPE_ACCESS, clone);
- }
+ error = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+ if (error < 0)
+ return error;
+
+ if (error > 0) {
+ /* This is an extended ACL */
+ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
}
- posix_acl_release(clone);
}
cleanup:
posix_acl_release(acl);
@@ -332,7 +296,9 @@ cleanup:
int
ext3_acl_chmod(struct inode *inode)
{
- struct posix_acl *acl, *clone;
+ struct posix_acl *acl;
+ handle_t *handle;
+ int retries = 0;
int error;
if (S_ISLNK(inode->i_mode))
@@ -342,31 +308,24 @@ ext3_acl_chmod(struct inode *inode)
acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
- clone = posix_acl_clone(acl, GFP_KERNEL);
- posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
- error = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!error) {
- handle_t *handle;
- int retries = 0;
-
- retry:
- handle = ext3_journal_start(inode,
- EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- ext3_std_error(inode->i_sb, error);
- goto out;
- }
- error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
- ext3_journal_stop(handle);
- if (error == -ENOSPC &&
- ext3_should_retry_alloc(inode->i_sb, &retries))
- goto retry;
+ error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (error)
+ return error;
+retry:
+ handle = ext3_journal_start(inode,
+ EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ ext3_std_error(inode->i_sb, error);
+ goto out;
}
+ error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl);
+ ext3_journal_stop(handle);
+ if (error == -ENOSPC &&
+ ext3_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
out:
- posix_acl_release(clone);
+ posix_acl_release(acl);
return error;
}
diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h
index 5faf804..dbc921e 100644
--- a/fs/ext3/acl.h
+++ b/fs/ext3/acl.h
@@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size)
#ifdef CONFIG_EXT3_FS_POSIX_ACL
/* acl.c */
-extern int ext3_check_acl (struct inode *, int, unsigned int);
+extern struct posix_acl *ext3_get_acl(struct inode *inode, int type);
extern int ext3_acl_chmod (struct inode *);
extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
#else /* CONFIG_EXT3_FS_POSIX_ACL */
#include <linux/sched.h>
-#define ext3_check_acl NULL
+#define ext3_get_acl NULL
static inline int
ext3_acl_chmod(struct inode *inode)
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index fe52297..a203892 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -21,6 +21,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
+#include <trace/events/ext3.h>
/*
* balloc.c contains the blocks allocation and deallocation routines
@@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
desc = ext3_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+ trace_ext3_read_block_bitmap(sb, block_group);
bitmap_blk = le32_to_cpu(desc->bg_block_bitmap);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
@@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb,
struct rb_node * parent = NULL;
struct ext3_reserve_window_node *this;
+ trace_ext3_rsv_window_add(sb, rsv);
while (*p)
{
parent = *p;
@@ -424,7 +427,7 @@ static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
void ext3_init_block_alloc_info(struct inode *inode)
{
struct ext3_inode_info *ei = EXT3_I(inode);
- struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+ struct ext3_block_alloc_info *block_i;
struct super_block *sb = inode->i_sb;
block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
@@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode)
rsv = &block_i->rsv_window_node;
if (!rsv_is_empty(&rsv->rsv_window)) {
spin_lock(rsv_lock);
- if (!rsv_is_empty(&rsv->rsv_window))
+ if (!rsv_is_empty(&rsv->rsv_window)) {
+ trace_ext3_discard_reservation(inode, rsv);
rsv_window_remove(inode->i_sb, rsv);
+ }
spin_unlock(rsv_lock);
}
}
@@ -683,14 +688,10 @@ error_return:
void ext3_free_blocks(handle_t *handle, struct inode *inode,
ext3_fsblk_t block, unsigned long count)
{
- struct super_block * sb;
+ struct super_block *sb = inode->i_sb;
unsigned long dquot_freed_blocks;
- sb = inode->i_sb;
- if (!sb) {
- printk ("ext3_free_blocks: nonexistent device");
- return;
- }
+ trace_ext3_free_blocks(inode, block, count);
ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
if (dquot_freed_blocks)
dquot_free_block(inode, dquot_freed_blocks);
@@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
else
start_block = grp_goal + group_first_block;
+ trace_ext3_alloc_new_reservation(sb, start_block);
size = my_rsv->rsv_goal_size;
if (!rsv_is_empty(&my_rsv->rsv_window)) {
@@ -1230,8 +1232,11 @@ retry:
* check if the first free block is within the
* free space we just reserved
*/
- if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end)
+ if (start_block >= my_rsv->rsv_start &&
+ start_block <= my_rsv->rsv_end) {
+ trace_ext3_reserved(sb, start_block, my_rsv);
return 0; /* success */
+ }
/*
* if the first free bit we found is out of the reservable space
* continue search for next reservable space,
@@ -1435,14 +1440,14 @@ out:
*
* Check if filesystem has at least 1 free block available for allocation.
*/
-static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi, int use_reservation)
{
ext3_fsblk_t free_blocks, root_blocks;
free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
- sbi->s_resuid != current_fsuid() &&
+ !use_reservation && sbi->s_resuid != current_fsuid() &&
(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
return 0;
}
@@ -1463,7 +1468,7 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
*/
int ext3_should_retry_alloc(struct super_block *sb, int *retries)
{
- if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+ if (!ext3_has_free_blocks(EXT3_SB(sb), 0) || (*retries)++ > 3)
return 0;
jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
@@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
*errp = -ENOSPC;
sb = inode->i_sb;
- if (!sb) {
- printk("ext3_new_block: nonexistent device");
- return 0;
- }
/*
* Check quota for allocation of this block.
@@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
return 0;
}
+ trace_ext3_request_blocks(inode, goal, num);
+
sbi = EXT3_SB(sb);
- es = EXT3_SB(sb)->s_es;
+ es = sbi->s_es;
ext3_debug("goal=%lu.\n", goal);
/*
* Allocate a block from reservation only when
@@ -1543,7 +1546,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
my_rsv = &block_i->rsv_window_node;
- if (!ext3_has_free_blocks(sbi)) {
+ if (!ext3_has_free_blocks(sbi, IS_NOQUOTA(inode))) {
*errp = -ENOSPC;
goto out;
}
@@ -1742,6 +1745,10 @@ allocated:
brelse(bitmap_bh);
dquot_free_block(inode, *count-num);
*count = num;
+
+ trace_ext3_allocate_blocks(inode, goal, num,
+ (unsigned long long)ret_block);
+
return ret_block;
io_error:
@@ -1917,9 +1924,10 @@ unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
* reaches any used block. Then issue a TRIM command on this extent and free
* the extent in the block bitmap. This is done until whole group is scanned.
*/
-ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
- ext3_grpblk_t start, ext3_grpblk_t max,
- ext3_grpblk_t minblocks)
+static ext3_grpblk_t ext3_trim_all_free(struct super_block *sb,
+ unsigned int group,
+ ext3_grpblk_t start, ext3_grpblk_t max,
+ ext3_grpblk_t minblocks)
{
handle_t *handle;
ext3_grpblk_t next, free_blocks, bit, freed, count = 0;
@@ -1996,6 +2004,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group,
if ((next - start) < minblocks)
goto free_extent;
+ trace_ext3_discard_blocks(sb, discard_block, next - start);
/* Send the TRIM command down to the device */
err = sb_issue_discard(sb, discard_block, next - start,
GFP_NOFS, 0);
@@ -2100,7 +2109,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb)))
return -EINVAL;
if (start >= max_blks)
- goto out;
+ return -EINVAL;
if (start + len > max_blks)
len = max_blks - start;
@@ -2148,8 +2157,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range)
if (ret >= 0)
ret = 0;
-
-out:
range->len = trimmed * sb->s_blocksize;
return ret;
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c
index 34f0a07..3268697 100644
--- a/fs/ext3/dir.c
+++ b/fs/ext3/dir.c
@@ -25,6 +25,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/buffer_head.h>
+#include <linux/compat.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
@@ -32,24 +33,8 @@ static unsigned char ext3_filetype_table[] = {
DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
};
-static int ext3_readdir(struct file *, void *, filldir_t);
static int ext3_dx_readdir(struct file * filp,
void * dirent, filldir_t filldir);
-static int ext3_release_dir (struct inode * inode,
- struct file * filp);
-
-const struct file_operations ext3_dir_operations = {
- .llseek = generic_file_llseek,
- .read = generic_read_dir,
- .readdir = ext3_readdir, /* we take BKL. needed?*/
- .unlocked_ioctl = ext3_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = ext3_compat_ioctl,
-#endif
- .fsync = ext3_sync_file, /* BKL held */
- .release = ext3_release_dir,
-};
-
static unsigned char get_dtype(struct super_block *sb, int filetype)
{
@@ -60,6 +45,25 @@ static unsigned char get_dtype(struct super_block *sb, int filetype)
return (ext3_filetype_table[filetype]);
}
+/**
+ * Check if the given dir-inode refers to an htree-indexed directory
+ * (or a directory which chould potentially get coverted to use htree
+ * indexing).
+ *
+ * Return 1 if it is a dx dir, 0 if not
+ */
+static int is_dx_dir(struct inode *inode)
+{
+ struct super_block *sb = inode->i_sb;
+
+ if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+ EXT3_FEATURE_COMPAT_DIR_INDEX) &&
+ ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+ ((inode->i_size >> sb->s_blocksize_bits) == 1)))
+ return 1;
+
+ return 0;
+}
int ext3_check_dir_entry (const char * function, struct inode * dir,
struct ext3_dir_entry_2 * de,
@@ -99,18 +103,13 @@ static int ext3_readdir(struct file * filp,
unsigned long offset;
int i, stored;
struct ext3_dir_entry_2 *de;
- struct super_block *sb;
int err;
struct inode *inode = filp->f_path.dentry->d_inode;
+ struct super_block *sb = inode->i_sb;
int ret = 0;
int dir_has_error = 0;
- sb = inode->i_sb;
-
- if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
- EXT3_FEATURE_COMPAT_DIR_INDEX) &&
- ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
- ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
+ if (is_dx_dir(inode)) {
err = ext3_dx_readdir(filp, dirent, filldir);
if (err != ERR_BAD_DX_DIR) {
ret = err;
@@ -232,22 +231,87 @@ out:
return ret;
}
+static inline int is_32bit_api(void)
+{
+#ifdef CONFIG_COMPAT
+ return is_compat_task();
+#else
+ return (BITS_PER_LONG == 32);
+#endif
+}
+
/*
* These functions convert from the major/minor hash to an f_pos
- * value.
+ * value for dx directories
*
- * Currently we only use major hash numer. This is unfortunate, but
- * on 32-bit machines, the same VFS interface is used for lseek and
- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
- * lseek/telldir/seekdir will blow out spectacularly, and from within
- * the ext2 low-level routine, we don't know if we're being called by
- * a 64-bit version of the system call or the 32-bit version of the
- * system call. Worse yet, NFSv2 only allows for a 32-bit readdir
- * cookie. Sigh.
+ * Upper layer (for example NFS) should specify FMODE_32BITHASH or
+ * FMODE_64BITHASH explicitly. On the other hand, we allow ext3 to be mounted
+ * directly on both 32-bit and 64-bit nodes, under such case, neither
+ * FMODE_32BITHASH nor FMODE_64BITHASH is specified.
*/
-#define hash2pos(major, minor) (major >> 1)
-#define pos2maj_hash(pos) ((pos << 1) & 0xffffffff)
-#define pos2min_hash(pos) (0)
+static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return major >> 1;
+ else
+ return ((__u64)(major >> 1) << 32) | (__u64)minor;
+}
+
+static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return (pos << 1) & 0xffffffff;
+ else
+ return ((pos >> 32) << 1) & 0xffffffff;
+}
+
+static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return 0;
+ else
+ return pos & 0xffffffff;
+}
+
+/*
+ * Return 32- or 64-bit end-of-file for dx directories
+ */
+static inline loff_t ext3_get_htree_eof(struct file *filp)
+{
+ if ((filp->f_mode & FMODE_32BITHASH) ||
+ (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
+ return EXT3_HTREE_EOF_32BIT;
+ else
+ return EXT3_HTREE_EOF_64BIT;
+}
+
+
+/*
+ * ext3_dir_llseek() calls generic_file_llseek[_size]() to handle both
+ * non-htree and htree directories, where the "offset" is in terms
+ * of the filename hash value instead of the byte offset.
+ *
+ * Because we may return a 64-bit hash that is well beyond s_maxbytes,
+ * we need to pass the max hash as the maximum allowable offset in
+ * the htree directory case.
+ *
+ * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX)
+ * will be invalid once the directory was converted into a dx directory
+ */
+loff_t ext3_dir_llseek(struct file *file, loff_t offset, int origin)
+{
+ struct inode *inode = file->f_mapping->host;
+ int dx_dir = is_dx_dir(inode);
+
+ if (likely(dx_dir))
+ return generic_file_llseek_size(file, offset, origin,
+ ext3_get_htree_eof(file));
+ else
+ return generic_file_llseek(file, offset, origin);
+}
/*
* This structure holds the nodes of the red-black tree used to store
@@ -308,15 +372,16 @@ static void free_rb_tree_fname(struct rb_root *root)
}
-static struct dir_private_info *ext3_htree_create_dir_info(loff_t pos)
+static struct dir_private_info *ext3_htree_create_dir_info(struct file *filp,
+ loff_t pos)
{
struct dir_private_info *p;
p = kzalloc(sizeof(struct dir_private_info), GFP_KERNEL);
if (!p)
return NULL;
- p->curr_hash = pos2maj_hash(pos);
- p->curr_minor_hash = pos2min_hash(pos);
+ p->curr_hash = pos2maj_hash(filp, pos);
+ p->curr_minor_hash = pos2min_hash(filp, pos);
return p;
}
@@ -406,7 +471,7 @@ static int call_filldir(struct file * filp, void * dirent,
printk("call_filldir: called with null fname?!?\n");
return 0;
}
- curr_pos = hash2pos(fname->hash, fname->minor_hash);
+ curr_pos = hash2pos(filp, fname->hash, fname->minor_hash);
while (fname) {
error = filldir(dirent, fname->name,
fname->name_len, curr_pos,
@@ -431,13 +496,13 @@ static int ext3_dx_readdir(struct file * filp,
int ret;
if (!info) {
- info = ext3_htree_create_dir_info(filp->f_pos);
+ info = ext3_htree_create_dir_info(filp, filp->f_pos);
if (!info)
return -ENOMEM;
filp->private_data = info;
}
- if (filp->f_pos == EXT3_HTREE_EOF)
+ if (filp->f_pos == ext3_get_htree_eof(filp))
return 0; /* EOF */
/* Some one has messed with f_pos; reset the world */
@@ -445,8 +510,8 @@ static int ext3_dx_readdir(struct file * filp,
free_rb_tree_fname(&info->root);
info->curr_node = NULL;
info->extra_fname = NULL;
- info->curr_hash = pos2maj_hash(filp->f_pos);
- info->curr_minor_hash = pos2min_hash(filp->f_pos);
+ info->curr_hash = pos2maj_hash(filp, filp->f_pos);
+ info->curr_minor_hash = pos2min_hash(filp, filp->f_pos);
}
/*
@@ -478,7 +543,7 @@ static int ext3_dx_readdir(struct file * filp,
if (ret < 0)
return ret;
if (ret == 0) {
- filp->f_pos = EXT3_HTREE_EOF;
+ filp->f_pos = ext3_get_htree_eof(filp);
break;
}
info->curr_node = rb_first(&info->root);
@@ -498,7 +563,7 @@ static int ext3_dx_readdir(struct file * filp,
info->curr_minor_hash = fname->minor_hash;
} else {
if (info->next_hash == ~0) {
- filp->f_pos = EXT3_HTREE_EOF;
+ filp->f_pos = ext3_get_htree_eof(filp);
break;
}
info->curr_hash = info->next_hash;
@@ -517,3 +582,15 @@ static int ext3_release_dir (struct inode * inode, struct file * filp)
return 0;
}
+
+const struct file_operations ext3_dir_operations = {
+ .llseek = ext3_dir_llseek,
+ .read = generic_read_dir,
+ .readdir = ext3_readdir,
+ .unlocked_ioctl = ext3_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = ext3_compat_ioctl,
+#endif
+ .fsync = ext3_sync_file,
+ .release = ext3_release_dir,
+};
diff --git a/fs/ext3/file.c b/fs/ext3/file.c
index f55df0e..724df69 100644
--- a/fs/ext3/file.c
+++ b/fs/ext3/file.c
@@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = {
};
const struct inode_operations ext3_file_inode_operations = {
- .truncate = ext3_truncate,
.setattr = ext3_setattr,
#ifdef CONFIG_EXT3_FS_XATTR
.setxattr = generic_setxattr,
@@ -79,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .get_acl = ext3_get_acl,
.fiemap = ext3_fiemap,
};
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c
index 09b13bb..1860ed3 100644
--- a/fs/ext3/fsync.c
+++ b/fs/ext3/fsync.c
@@ -30,6 +30,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
#include <linux/ext3_jbd.h>
+#include <trace/events/ext3.h>
/*
* akpm: A new design for ext3_sync_file().
@@ -43,7 +44,7 @@
* inode to disk.
*/
-int ext3_sync_file(struct file *file, int datasync)
+int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct inode *inode = file->f_mapping->host;
struct ext3_inode_info *ei = EXT3_I(inode);
@@ -51,9 +52,15 @@ int ext3_sync_file(struct file *file, int datasync)
int ret, needs_barrier = 0;
tid_t commit_tid;
+ trace_ext3_sync_file_enter(file, datasync);
+
if (inode->i_sb->s_flags & MS_RDONLY)
return 0;
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ goto out;
+
J_ASSERT(ext3_journal_current_handle() == NULL);
/*
@@ -70,8 +77,10 @@ int ext3_sync_file(struct file *file, int datasync)
* (they were dirtied by commit). But that's OK - the blocks are
* safe in-journal, which is all fsync() needs to ensure.
*/
- if (ext3_should_journal_data(inode))
- return ext3_force_commit(inode->i_sb);
+ if (ext3_should_journal_data(inode)) {
+ ret = ext3_force_commit(inode->i_sb);
+ goto out;
+ }
if (datasync)
commit_tid = atomic_read(&ei->i_datasync_tid);
@@ -91,5 +100,7 @@ int ext3_sync_file(struct file *file, int datasync)
*/
if (needs_barrier)
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
+out:
+ trace_ext3_sync_file_exit(inode, ret);
return ret;
}
diff --git a/fs/ext3/hash.c b/fs/ext3/hash.c
index 7d215b4..d4d3ade 100644
--- a/fs/ext3/hash.c
+++ b/fs/ext3/hash.c
@@ -200,8 +200,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
return -1;
}
hash = hash & ~1;
- if (hash == (EXT3_HTREE_EOF << 1))
- hash = (EXT3_HTREE_EOF-1) << 1;
+ if (hash == (EXT3_HTREE_EOF_32BIT << 1))
+ hash = (EXT3_HTREE_EOF_32BIT - 1) << 1;
hinfo->hash = hash;
hinfo->minor_hash = minor_hash;
return 0;
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 0b3da7c..adae962 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -23,6 +23,7 @@
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
+#include <trace/events/ext3.h>
#include <asm/byteorder.h>
@@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
ino = inode->i_ino;
ext3_debug ("freeing inode %lu\n", ino);
+ trace_ext3_free_inode(inode);
is_directory = S_ISDIR(inode->i_mode);
@@ -176,42 +178,6 @@ error_return:
}
/*
- * There are two policies for allocating an inode. If the new inode is
- * a directory, then a forward search is made for a block group with both
- * free space and a low directory-to-inode ratio; if that fails, then of
- * the groups with above-average free space, that group with the fewest
- * directories already is chosen.
- *
- * For other inodes, search forward from the parent directory\'s block
- * group to find a free inode.
- */
-static int find_group_dir(struct super_block *sb, struct inode *parent)
-{
- int ngroups = EXT3_SB(sb)->s_groups_count;
- unsigned int freei, avefreei;
- struct ext3_group_desc *desc, *best_desc = NULL;
- int group, best_group = -1;
-
- freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
- avefreei = freei / ngroups;
-
- for (group = 0; group < ngroups; group++) {
- desc = ext3_get_group_desc (sb, group, NULL);
- if (!desc || !desc->bg_free_inodes_count)
- continue;
- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
- continue;
- if (!best_desc ||
- (le16_to_cpu(desc->bg_free_blocks_count) >
- le16_to_cpu(best_desc->bg_free_blocks_count))) {
- best_group = group;
- best_desc = desc;
- }
- }
- return best_group;
-}
-
-/*
* Orlov's allocator for directories.
*
* We always try to spread first-level directories.
@@ -426,6 +392,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
return ERR_PTR(-EPERM);
sb = dir->i_sb;
+ trace_ext3_request_inode(dir, mode);
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -433,12 +400,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir,
sbi = EXT3_SB(sb);
es = sbi->s_es;
- if (S_ISDIR(mode)) {
- if (test_opt (sb, OLDALLOC))
- group = find_group_dir(sb, dir);
- else
- group = find_group_orlov(sb, dir);
- } else
+ if (S_ISDIR(mode))
+ group = find_group_orlov(sb, dir);
+ else
group = find_group_other(sb, dir);
err = -ENOSPC;
@@ -605,6 +569,7 @@ got:
}
ext3_debug("allocating inode %lu\n", inode->i_ino);
+ trace_ext3_allocate_inode(inode, dir, mode);
goto really_out;
fail:
ext3_std_error(sb, err);
@@ -621,7 +586,7 @@ fail_free_drop:
fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
- inode->i_nlink = 0;
+ clear_nlink(inode);
unlock_new_inode(inode);
iput(inode);
brelse(bitmap_bh);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 0aedb27..71b263f 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -38,10 +38,12 @@
#include <linux/bio.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <trace/events/ext3.h>
#include "xattr.h"
#include "acl.h"
static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext3_block_truncate_page(struct inode *inode, loff_t from);
/*
* Test whether an inode is a fast symlink.
@@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
might_sleep();
+ trace_ext3_forget(inode, is_metadata, blocknr);
BUFFER_TRACE(bh, "enter");
jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
@@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode)
*/
void ext3_evict_inode (struct inode *inode)
{
+ struct ext3_inode_info *ei = EXT3_I(inode);
struct ext3_block_alloc_info *rsv;
handle_t *handle;
int want_delete = 0;
+ trace_ext3_evict_inode(inode);
if (!inode->i_nlink && !is_bad_inode(inode)) {
dquot_initialize(inode);
want_delete = 1;
}
+ /*
+ * When journalling data dirty buffers are tracked only in the journal.
+ * So although mm thinks everything is clean and ready for reaping the
+ * inode might still have some pages to write in the running
+ * transaction or waiting to be checkpointed. Thus calling
+ * journal_invalidatepage() (via truncate_inode_pages()) to discard
+ * these buffers can cause data loss. Also even if we did not discard
+ * these buffers, we would have no way to find them after the inode
+ * is reaped and thus user could see stale data if he tries to read
+ * them before the transaction is checkpointed. So be careful and
+ * force everything to disk here... We use ei->i_datasync_tid to
+ * store the newest transaction containing inode's data.
+ *
+ * Note that directories do not have this problem because they don't
+ * use page cache.
+ */
+ if (inode->i_nlink && ext3_should_journal_data(inode) &&
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
+ tid_t commit_tid = atomic_read(&ei->i_datasync_tid);
+ journal_t *journal = EXT3_SB(inode->i_sb)->s_journal;
+
+ log_start_commit(journal, commit_tid);
+ log_wait_commit(journal, commit_tid);
+ filemap_write_and_wait(&inode->i_data);
+ }
truncate_inode_pages(&inode->i_data, 0);
ext3_discard_reservation(inode);
- rsv = EXT3_I(inode)->i_block_alloc_info;
- EXT3_I(inode)->i_block_alloc_info = NULL;
+ rsv = ei->i_block_alloc_info;
+ ei->i_block_alloc_info = NULL;
if (unlikely(rsv))
kfree(rsv);
@@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode)
if (inode->i_blocks)
ext3_truncate(inode);
/*
- * Kill off the orphan record which ext3_truncate created.
- * AKPM: I think this can be inside the above `if'.
- * Note that ext3_orphan_del() has to be able to cope with the
- * deletion of a non-existent orphan - this is because we don't
- * know if ext3_truncate() actually created an orphan record.
- * (Well, we could do this if we need to, but heck - it works)
+ * Kill off the orphan record created when the inode lost the last
+ * link. Note that ext3_orphan_del() has to be able to cope with the
+ * deletion of a non-existent orphan - ext3_truncate() could
+ * have removed the record.
*/
ext3_orphan_del(handle, inode);
- EXT3_I(inode)->i_dtime = get_seconds();
+ ei->i_dtime = get_seconds();
/*
* One subtle ordering requirement: if anything has gone wrong
@@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
ext3_fsblk_t first_block = 0;
+ trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create);
J_ASSERT(handle != NULL || create == 0);
depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
@@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
if (!create || err == -EIO)
goto cleanup;
+ /*
+ * Block out ext3_truncate while we alter the tree
+ */
mutex_lock(&ei->truncate_mutex);
/*
@@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
*/
count = ext3_blks_to_allocate(partial, indirect_blks,
maxblocks, blocks_to_boundary);
- /*
- * Block out ext3_truncate while we alter the tree
- */
err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
offsets + (partial - chain), partial);
@@ -970,6 +999,9 @@ cleanup:
}
BUFFER_TRACE(bh_result, "returned");
out:
+ trace_ext3_get_blocks_exit(inode, iblock,
+ depth ? le32_to_cpu(chain[depth-1].key) : 0,
+ count, err);
return err;
}
@@ -1102,7 +1134,7 @@ struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
return bh;
if (buffer_uptodate(bh))
return bh;
- ll_rw_block(READ_META, 1, &bh);
+ ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (buffer_uptodate(bh))
return bh;
@@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode)
ext3_truncate(inode);
}
+/*
+ * Truncate blocks that were not used by direct IO write. We have to zero out
+ * the last file block as well because direct IO might have written to it.
+ */
+static void ext3_truncate_failed_direct_write(struct inode *inode)
+{
+ ext3_block_truncate_page(inode, inode->i_size);
+ ext3_truncate(inode);
+}
+
static int ext3_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping,
* we allocate blocks but write fails for some reason */
int needed_blocks = ext3_writepage_trans_blocks(inode) + 1;
+ trace_ext3_write_begin(inode, pos, len, flags);
+
index = pos >> PAGE_CACHE_SHIFT;
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file,
unsigned from, to;
int ret = 0, ret2;
+ trace_ext3_ordered_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file,
struct inode *inode = file->f_mapping->host;
int ret;
+ trace_ext3_writeback_write_end(inode, pos, len, copied);
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
update_file_sizes(inode, pos, copied);
/*
@@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file,
{
handle_t *handle = ext3_journal_current_handle();
struct inode *inode = mapping->host;
+ struct ext3_inode_info *ei = EXT3_I(inode);
int ret = 0, ret2;
int partial = 0;
unsigned from, to;
+ trace_ext3_journalled_write_end(inode, pos, len, copied);
from = pos & (PAGE_CACHE_SIZE - 1);
to = from + len;
@@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file,
if (pos + len > inode->i_size && ext3_can_truncate(inode))
ext3_orphan_add(handle, inode);
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
- if (inode->i_size > EXT3_I(inode)->i_disksize) {
- EXT3_I(inode)->i_disksize = inode->i_size;
+ atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid);
+ if (inode->i_size > ei->i_disksize) {
+ ei->i_disksize = inode->i_size;
ret2 = ext3_mark_inode_dirty(handle, inode);
if (!ret)
ret = ret2;
@@ -1583,6 +1632,7 @@ static int ext3_ordered_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_ordered_writepage(page);
if (!page_has_buffers(page)) {
create_empty_buffers(page, inode->i_sb->s_blocksize,
(1 << BH_Dirty)|(1 << BH_Uptodate));
@@ -1659,6 +1709,7 @@ static int ext3_writeback_writepage(struct page *page,
if (ext3_journal_current_handle())
goto out_fail;
+ trace_ext3_writeback_writepage(page);
if (page_has_buffers(page)) {
if (!walk_page_buffers(NULL, page_buffers(page), 0,
PAGE_CACHE_SIZE, NULL, buffer_unmapped)) {
@@ -1707,6 +1758,7 @@ static int ext3_journalled_writepage(struct page *page,
if (ext3_journal_current_handle())
goto no_write;
+ trace_ext3_journalled_writepage(page);
handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
@@ -1733,6 +1785,8 @@ static int ext3_journalled_writepage(struct page *page,
if (ret == 0)
ret = err;
ext3_set_inode_state(inode, EXT3_STATE_JDATA);
+ atomic_set(&EXT3_I(inode)->i_datasync_tid,
+ handle->h_transaction->t_tid);
unlock_page(page);
} else {
/*
@@ -1757,6 +1811,7 @@ out_unlock:
static int ext3_readpage(struct file *file, struct page *page)
{
+ trace_ext3_readpage(page);
return mpage_readpage(page, ext3_get_block);
}
@@ -1771,6 +1826,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_invalidatepage(page, offset);
+
/*
* If it's a full truncate we just forget about the pending dirtying
*/
@@ -1784,6 +1841,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
{
journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+ trace_ext3_releasepage(page);
WARN_ON(PageChecked(page));
if (!page_has_buffers(page))
return 0;
@@ -1812,6 +1870,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
size_t count = iov_length(iov, nr_segs);
int retries = 0;
+ trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw);
+
if (rw == WRITE) {
loff_t final_size = offset + count;
@@ -1834,9 +1894,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
}
retry:
- ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
- offset, nr_segs,
- ext3_get_block, NULL);
+ ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs,
+ ext3_get_block);
/*
* In case of error extending write may have instantiated a few
* blocks outside i_size. Trim these off again.
@@ -1846,7 +1905,7 @@ retry:
loff_t end = offset + iov_length(iov, nr_segs);
if (end > isize)
- vmtruncate(inode, isize);
+ ext3_truncate_failed_direct_write(inode);
}
if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -1860,7 +1919,7 @@ retry:
/* This is really bad luck. We've written the data
* but cannot extend i_size. Truncate allocated blocks
* and pretend the write failed... */
- ext3_truncate(inode);
+ ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
goto out;
}
@@ -1886,6 +1945,8 @@ retry:
ret = err;
}
out:
+ trace_ext3_direct_IO_exit(inode, offset,
+ iov_length(iov, nr_segs), rw, ret);
return ret;
}
@@ -1968,17 +2029,24 @@ void ext3_set_aops(struct inode *inode)
* This required during truncate. We need to physically zero the tail end
* of that block so it doesn't yield old data if the file is later grown.
*/
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
- struct address_space *mapping, loff_t from)
+static int ext3_block_truncate_page(struct inode *inode, loff_t from)
{
ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, length, pos;
- struct inode *inode = mapping->host;
+ struct page *page;
+ handle_t *handle = NULL;
struct buffer_head *bh;
int err = 0;
+ /* Truncated on block boundary - nothing to do */
blocksize = inode->i_sb->s_blocksize;
+ if ((from & (blocksize - 1)) == 0)
+ return 0;
+
+ page = grab_cache_page(inode->i_mapping, index);
+ if (!page)
+ return -ENOMEM;
length = blocksize - (offset & (blocksize - 1));
iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
@@ -2023,11 +2091,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
goto unlock;
}
+ /* data=writeback mode doesn't need transaction to zero-out data */
+ if (!ext3_should_writeback_data(inode)) {
+ /* We journal at most one block */
+ handle = ext3_journal_start(inode, 1);
+ if (IS_ERR(handle)) {
+ clear_highpage(page);
+ flush_dcache_page(page);
+ err = PTR_ERR(handle);
+ goto unlock;
+ }
+ }
+
if (ext3_should_journal_data(inode)) {
BUFFER_TRACE(bh, "get write access");
err = ext3_journal_get_write_access(handle, bh);
if (err)
- goto unlock;
+ goto stop;
}
zero_user(page, offset, length);
@@ -2041,6 +2121,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
err = ext3_journal_dirty_data(handle, bh);
mark_buffer_dirty(bh);
}
+stop:
+ if (handle)
+ ext3_journal_stop(handle);
unlock:
unlock_page(page);
@@ -2409,8 +2492,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
int ext3_can_truncate(struct inode *inode)
{
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
- return 0;
if (S_ISREG(inode->i_mode))
return 1;
if (S_ISDIR(inode->i_mode))
@@ -2454,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
struct ext3_inode_info *ei = EXT3_I(inode);
__le32 *i_data = ei->i_data;
int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
- struct address_space *mapping = inode->i_mapping;
int offsets[4];
Indirect chain[4];
Indirect *partial;
@@ -2462,7 +2542,8 @@ void ext3_truncate(struct inode *inode)
int n;
long last_block;
unsigned blocksize = inode->i_sb->s_blocksize;
- struct page *page;
+
+ trace_ext3_truncate_enter(inode);
if (!ext3_can_truncate(inode))
goto out_notrans;
@@ -2470,37 +2551,12 @@ void ext3_truncate(struct inode *inode)
if (inode->i_size == 0 && ext3_should_writeback_data(inode))
ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE);
- /*
- * We have to lock the EOF page here, because lock_page() nests
- * outside journal_start().
- */
- if ((inode->i_size & (blocksize - 1)) == 0) {
- /* Block boundary? Nothing to do */
- page = NULL;
- } else {
- page = grab_cache_page(mapping,
- inode->i_size >> PAGE_CACHE_SHIFT);
- if (!page)
- goto out_notrans;
- }
-
handle = start_transaction(inode);
- if (IS_ERR(handle)) {
- if (page) {
- clear_highpage(page);
- flush_dcache_page(page);
- unlock_page(page);
- page_cache_release(page);
- }
+ if (IS_ERR(handle))
goto out_notrans;
- }
last_block = (inode->i_size + blocksize-1)
>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
-
- if (page)
- ext3_block_truncate_page(handle, page, mapping, inode->i_size);
-
n = ext3_block_to_path(inode, last_block, offsets, NULL);
if (n == 0)
goto out_stop; /* error */
@@ -2615,6 +2671,7 @@ out_stop:
ext3_orphan_del(handle, inode);
ext3_journal_stop(handle);
+ trace_ext3_truncate_exit(inode);
return;
out_notrans:
/*
@@ -2623,6 +2680,7 @@ out_notrans:
*/
if (inode->i_nlink)
ext3_orphan_del(NULL, inode);
+ trace_ext3_truncate_exit(inode);
}
static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
@@ -2764,9 +2822,10 @@ make_io:
* has in-inode xattrs, or we don't have this inode in memory.
* Read the block from disk.
*/
+ trace_ext3_load_inode(inode);
get_bh(bh);
bh->b_end_io = end_buffer_read_sync;
- submit_bh(READ_META, bh);
+ submit_bh(READ | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh)) {
ext3_error(inode->i_sb, "ext3_get_inode_loc",
@@ -2858,7 +2917,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino)
inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
}
- inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+ set_nlink(inode, le16_to_cpu(raw_inode->i_links_count));
inode->i_size = le32_to_cpu(raw_inode->i_size);
inode->i_atime.tv_sec = (signed)le32_to_cpu(raw_inode->i_atime);
inode->i_ctime.tv_sec = (signed)le32_to_cpu(raw_inode->i_ctime);
@@ -3245,6 +3304,9 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
ext3_journal_stop(handle);
}
+ if (attr->ia_valid & ATTR_SIZE)
+ inode_dio_wait(inode);
+
if (S_ISREG(inode->i_mode) &&
attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
handle_t *handle;
@@ -3256,18 +3318,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
}
error = ext3_orphan_add(handle, inode);
+ if (error) {
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
EXT3_I(inode)->i_disksize = attr->ia_size;
- rc = ext3_mark_inode_dirty(handle, inode);
- if (!error)
- error = rc;
+ error = ext3_mark_inode_dirty(handle, inode);
ext3_journal_stop(handle);
+ if (error) {
+ /* Some hard fs error must have happened. Bail out. */
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ rc = ext3_block_truncate_page(inode, attr->ia_size);
+ if (rc) {
+ /* Cleanup orphan list and exit */
+ handle = ext3_journal_start(inode, 3);
+ if (IS_ERR(handle)) {
+ ext3_orphan_del(NULL, inode);
+ goto err_out;
+ }
+ ext3_orphan_del(handle, inode);
+ ext3_journal_stop(handle);
+ goto err_out;
+ }
}
if ((attr->ia_valid & ATTR_SIZE) &&
attr->ia_size != i_size_read(inode)) {
- rc = vmtruncate(inode, attr->ia_size);
- if (rc)
- goto err_out;
+ truncate_setsize(inode, attr->ia_size);
+ ext3_truncate(inode);
}
setattr_copy(inode, attr);
@@ -3401,6 +3481,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
int err;
might_sleep();
+ trace_ext3_mark_inode_dirty(inode, _RET_IP_);
err = ext3_reserve_inode_write(handle, inode, &iloc);
if (!err)
err = ext3_mark_iloc_dirty(handle, inode, &iloc);
diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c
index f4090bd..ba1b54e 100644
--- a/fs/ext3/ioctl.c
+++ b/fs/ext3/ioctl.c
@@ -150,30 +150,6 @@ setversion_out:
mnt_drop_write(filp->f_path.mnt);
return err;
}
-#ifdef CONFIG_JBD_DEBUG
- case EXT3_IOC_WAIT_FOR_READONLY:
- /*
- * This is racy - by the time we're woken up and running,
- * the superblock could be released. And the module could
- * have been unloaded. So sue me.
- *
- * Returns 1 if it slept, else zero.
- */
- {
- struct super_block *sb = inode->i_sb;
- DECLARE_WAITQUEUE(wait, current);
- int ret = 0;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
- if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
- schedule();
- ret = 1;
- }
- remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
- return ret;
- }
-#endif
case EXT3_IOC_GETRSVSZ:
if (test_opt(inode->i_sb, RESERVATION)
&& S_ISREG(inode->i_mode)
@@ -285,7 +261,7 @@ group_add_out:
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (copy_from_user(&range, (struct fstrim_range *)arg,
+ if (copy_from_user(&range, (struct fstrim_range __user *)arg,
sizeof(range)))
return -EFAULT;
@@ -293,7 +269,7 @@ group_add_out:
if (ret < 0)
return ret;
- if (copy_to_user((struct fstrim_range *)arg, &range,
+ if (copy_to_user((struct fstrim_range __user *)arg, &range,
sizeof(range)))
return -EFAULT;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 8c9f82d..1272dfb 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -36,6 +36,7 @@
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/bio.h>
+#include <trace/events/ext3.h>
#include "namei.h"
#include "xattr.h"
@@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
while (len--) printk("%c", *name++);
ext3fs_dirhash(de->name, de->name_len, &h);
printk(":%x.%u ", h.hash,
- ((char *) de - base));
+ (unsigned) ((char *) de - base));
}
space += EXT3_DIR_REC_LEN(de->name_len);
names++;
@@ -918,7 +919,8 @@ restart:
bh = ext3_getblk(NULL, dir, b++, 0, &err);
bh_use[ra_max] = bh;
if (bh)
- ll_rw_block(READ_META, 1, &bh);
+ ll_rw_block(READ | REQ_META | REQ_PRIO,
+ 1, &bh);
}
}
if ((bh = bh_use[ra_ptr++]) == NULL)
@@ -1010,7 +1012,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir,
*err = -ENOENT;
errout:
- dxtrace(printk("%s not found\n", name));
+ dxtrace(printk("%s not found\n", entry->name));
dx_release (frames);
return NULL;
}
@@ -1035,15 +1037,11 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
return ERR_PTR(-EIO);
}
inode = ext3_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
- if (PTR_ERR(inode) == -ESTALE) {
- ext3_error(dir->i_sb, __func__,
- "deleted inode referenced: %lu",
- ino);
- return ERR_PTR(-EIO);
- } else {
- return ERR_CAST(inode);
- }
+ if (inode == ERR_PTR(-ESTALE)) {
+ ext3_error(dir->i_sb, __func__,
+ "deleted inode referenced: %lu",
+ ino);
+ return ERR_PTR(-EIO);
}
}
return d_splice_alias(inode, dentry);
@@ -1820,7 +1818,7 @@ retry:
de->name_len = 2;
strcpy (de->name, "..");
ext3_set_de_type(dir->i_sb, de, S_IFDIR);
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
err = ext3_journal_dirty_metadata(handle, dir_block);
if (err)
@@ -1832,7 +1830,7 @@ retry:
if (err) {
out_clear_inode:
- inode->i_nlink = 0;
+ clear_nlink(inode);
unlock_new_inode(inode);
ext3_mark_inode_dirty(handle, inode);
iput (inode);
@@ -2141,6 +2139,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
struct ext3_dir_entry_2 * de;
handle_t *handle;
+ trace_ext3_unlink_enter(dir, dentry);
/* Initialize quotas before so that eventual writes go
* in separate transaction */
dquot_initialize(dir);
@@ -2168,7 +2167,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
ext3_warning (inode->i_sb, "ext3_unlink",
"Deleting nonexistent file (%lu), %d",
inode->i_ino, inode->i_nlink);
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
}
retval = ext3_delete_entry(handle, dir, de, bh);
if (retval)
@@ -2186,6 +2185,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
end_unlink:
ext3_journal_stop(handle);
brelse (bh);
+ trace_ext3_unlink_exit(dentry, retval);
return retval;
}
@@ -2532,7 +2532,7 @@ const struct inode_operations ext3_dir_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .get_acl = ext3_get_acl,
};
const struct inode_operations ext3_special_inode_operations = {
@@ -2543,5 +2543,5 @@ const struct inode_operations ext3_special_inode_operations = {
.listxattr = ext3_listxattr,
.removexattr = generic_removexattr,
#endif
- .check_acl = ext3_check_acl,
+ .get_acl = ext3_get_acl,
};
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ba57a63..562ede3 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -44,6 +44,9 @@
#include "acl.h"
#include "namei.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/ext3.h>
+
#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
#define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
#else
@@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb)
return &ei->vfs_inode;
}
+static int ext3_drop_inode(struct inode *inode)
+{
+ int drop = generic_drop_inode(inode);
+
+ trace_ext3_drop_inode(inode, drop);
+ return drop;
+}
+
static void ext3_i_callback(struct rcu_head *head)
{
struct inode *inode = container_of(head, struct inode, i_rcu);
@@ -641,8 +652,6 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",nouid32");
if (test_opt(sb, DEBUG))
seq_puts(seq, ",debug");
- if (test_opt(sb, OLDALLOC))
- seq_puts(seq, ",oldalloc");
#ifdef CONFIG_EXT3_FS_XATTR
if (test_opt(sb, XATTR_USER))
seq_puts(seq, ",user_xattr");
@@ -788,6 +797,7 @@ static const struct super_operations ext3_sops = {
.destroy_inode = ext3_destroy_inode,
.write_inode = ext3_write_inode,
.dirty_inode = ext3_dirty_inode,
+ .drop_inode = ext3_drop_inode,
.evict_inode = ext3_evict_inode,
.put_super = ext3_put_super,
.sync_fs = ext3_sync_fs,
@@ -1037,10 +1047,12 @@ static int parse_options (char *options, struct super_block *sb,
set_opt (sbi->s_mount_opt, DEBUG);
break;
case Opt_oldalloc:
- set_opt (sbi->s_mount_opt, OLDALLOC);
+ ext3_msg(sb, KERN_WARNING,
+ "Ignoring deprecated oldalloc option");
break;
case Opt_orlov:
- clear_opt (sbi->s_mount_opt, OLDALLOC);
+ ext3_msg(sb, KERN_WARNING,
+ "Ignoring deprecated orlov option");
break;
#ifdef CONFIG_EXT3_FS_XATTR
case Opt_user_xattr:
@@ -1291,13 +1303,6 @@ set_qf_format:
"not specified.");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext3_msg(sb, KERN_ERR, "error: journaled quota format "
- "specified with no journaling "
- "enabled.");
- return 0;
- }
}
#endif
return 1;
@@ -1718,6 +1723,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+ /* enable barriers by default */
+ set_opt(sbi->s_mount_opt, BARRIER);
set_opt(sbi->s_mount_opt, RESERVATION);
if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
@@ -2507,6 +2514,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
{
tid_t target;
+ trace_ext3_sync_fs(sb, wait);
if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
if (wait)
log_wait_commit(EXT3_SB(sb)->s_journal, target);
@@ -2654,13 +2662,13 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
/*
* If we have an unprocessed orphan list hanging
* around from a previously readonly bdev mount,
- * require a full umount/remount for now.
+ * require a full umount & mount for now.
*/
if (es->s_last_orphan) {
ext3_msg(sb, KERN_WARNING, "warning: couldn't "
"remount RDWR because of unprocessed "
"orphan inode list. Please "
- "umount/remount instead.");
+ "umount & mount instead.");
err = -EINVAL;
goto restore_opts;
}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index b8d9f83..3c218b8 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -48,28 +48,32 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name,
name, value, size, flags);
}
-int
-ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
- const struct qstr *qstr)
+int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
{
- int err;
- size_t len;
- void *value;
- char *name;
+ const struct xattr *xattr;
+ handle_t *handle = fs_info;
+ int err = 0;
- err = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = ext3_xattr_set_handle(handle, inode,
+ EXT3_XATTR_INDEX_SECURITY,
+ xattr->name, xattr->value,
+ xattr->value_len, 0);
+ if (err < 0)
+ break;
}
- err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
- name, value, len, 0);
- kfree(name);
- kfree(value);
return err;
}
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
+{
+ return security_inode_init_security(inode, dir, qstr,
+ &ext3_initxattrs, handle);
+}
+
const struct xattr_handler ext3_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
.list = ext3_xattr_security_list,
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 1a43114..7b2af5a 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -227,7 +227,7 @@ vxfs_iinit(struct inode *ip, struct vxfs_inode_info *vip)
ip->i_uid = (uid_t)vip->vii_uid;
ip->i_gid = (gid_t)vip->vii_gid;
- ip->i_nlink = vip->vii_nlink;
+ set_nlink(ip, vip->vii_nlink);
ip->i_size = vip->vii_size;
ip->i_atime.tv_sec = vip->vii_atime;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index cbc0715..65978d7 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -67,39 +67,12 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type)
return acl;
}
-/**
- * gfs2_check_acl - Check an ACL to see if we're allowed to do something
- * @inode: the file we want to do something to
- * @mask: what we want to do
- *
- * Returns: errno
- */
-
-int gfs2_check_acl(struct inode *inode, int mask, unsigned int flags)
+struct posix_acl *gfs2_get_acl(struct inode *inode, int type)
{
- struct posix_acl *acl;
- int error;
-
- if (flags & IPERM_FLAG_RCU) {
- if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
- return -ECHILD;
- return -EAGAIN;
- }
-
- acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
-
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- return error;
- }
-
- return -EAGAIN;
+ return gfs2_acl_get(GFS2_I(inode), type);
}
-static int gfs2_set_mode(struct inode *inode, mode_t mode)
+static int gfs2_set_mode(struct inode *inode, umode_t mode)
{
int error = 0;
@@ -109,7 +82,7 @@ static int gfs2_set_mode(struct inode *inode, mode_t mode)
iattr.ia_valid = ATTR_MODE;
iattr.ia_mode = mode;
- error = gfs2_setattr_simple(GFS2_I(inode), &iattr);
+ error = gfs2_setattr_simple(inode, &iattr);
}
return error;
@@ -143,8 +116,8 @@ out:
int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct posix_acl *acl, *clone;
- mode_t mode = inode->i_mode;
+ struct posix_acl *acl;
+ umode_t mode = inode->i_mode;
int error = 0;
if (!sdp->sd_args.ar_posix_acl)
@@ -168,16 +141,10 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode)
goto out;
}
- clone = posix_acl_clone(acl, GFP_NOFS);
- error = -ENOMEM;
- if (!clone)
- goto out;
- posix_acl_release(acl);
- acl = clone;
-
- error = posix_acl_create_masq(acl, &mode);
+ error = posix_acl_create(&acl, GFP_NOFS, &mode);
if (error < 0)
- goto out;
+ return error;
+
if (error == 0)
goto munge;
@@ -193,7 +160,8 @@ out:
int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
{
- struct posix_acl *acl, *clone;
+ struct inode *inode = &ip->i_inode;
+ struct posix_acl *acl;
char *data;
unsigned int len;
int error;
@@ -202,27 +170,21 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr)
if (IS_ERR(acl))
return PTR_ERR(acl);
if (!acl)
- return gfs2_setattr_simple(ip, attr);
+ return gfs2_setattr_simple(inode, attr);
- clone = posix_acl_clone(acl, GFP_NOFS);
+ error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode);
+ if (error)
+ return error;
+
+ len = posix_acl_to_xattr(acl, NULL, 0);
+ data = kmalloc(len, GFP_NOFS);
error = -ENOMEM;
- if (!clone)
+ if (data == NULL)
goto out;
- posix_acl_release(acl);
- acl = clone;
-
- error = posix_acl_chmod_masq(acl, attr->ia_mode);
- if (!error) {
- len = posix_acl_to_xattr(acl, NULL, 0);
- data = kmalloc(len, GFP_NOFS);
- error = -ENOMEM;
- if (data == NULL)
- goto out;
- posix_acl_to_xattr(acl, data, len);
- error = gfs2_xattr_acl_chmod(ip, attr, data);
- kfree(data);
- set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl);
- }
+ posix_acl_to_xattr(acl, data, len);
+ error = gfs2_xattr_acl_chmod(ip, attr, data);
+ kfree(data);
+ set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl);
out:
posix_acl_release(acl);
@@ -315,7 +277,7 @@ static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
goto out_release;
if (type == ACL_TYPE_ACCESS) {
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
error = posix_acl_equiv_mode(acl, &mode);
if (error <= 0) {
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h
index a93907c..0da38dc 100644
--- a/fs/gfs2/acl.h
+++ b/fs/gfs2/acl.h
@@ -16,7 +16,7 @@
#define GFS2_POSIX_ACL_DEFAULT "posix_acl_default"
#define GFS2_ACL_MAX_ENTRIES 25
-extern int gfs2_check_acl(struct inode *inode, int mask, unsigned int);
+extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode);
extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr);
extern const struct xattr_handler gfs2_xattr_system_handler;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index f9fbbe9..4858e1f 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -663,7 +663,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
if (&ip->i_inode == sdp->sd_rindex)
rblocks += 2 * RES_STATFS;
if (alloc_required)
- rblocks += gfs2_rg_blocks(al);
+ rblocks += gfs2_rg_blocks(ip);
error = gfs2_trans_begin(sdp, rblocks,
PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
@@ -787,7 +787,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
u64 to = pos + copied;
void *kaddr;
unsigned char *buf = dibh->b_data + sizeof(struct gfs2_dinode);
- struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
BUG_ON((pos + len) > (dibh->b_size - sizeof(struct gfs2_dinode)));
kaddr = kmap_atomic(page, KM_USER0);
@@ -804,7 +803,6 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
if (copied) {
if (inode->i_size < to)
i_size_write(inode, to);
- gfs2_dinode_out(ip, di);
mark_inode_dirty(inode);
}
@@ -873,10 +871,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
gfs2_page_add_databufs(ip, page, from, to);
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
- if (ret > 0) {
- gfs2_dinode_out(ip, dibh->b_data);
- mark_inode_dirty(inode);
- }
if (inode == sdp->sd_rindex) {
adjust_fs_space(inode);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index e65493a..41d494d 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -10,6 +10,7 @@
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
@@ -36,11 +37,6 @@ struct metapath {
__u16 mp_list[GFS2_MAX_META_HEIGHT];
};
-typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
- struct buffer_head *bh, __be64 *top,
- __be64 *bottom, unsigned int height,
- void *data);
-
struct strip_mine {
int sm_first;
unsigned int sm_height;
@@ -273,6 +269,30 @@ static inline __be64 *metapointer(unsigned int height, const struct metapath *mp
return ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
}
+static void gfs2_metapath_ra(struct gfs2_glock *gl,
+ const struct buffer_head *bh, const __be64 *pos)
+{
+ struct buffer_head *rabh;
+ const __be64 *endp = (const __be64 *)(bh->b_data + bh->b_size);
+ const __be64 *t;
+
+ for (t = pos; t < endp; t++) {
+ if (!*t)
+ continue;
+
+ rabh = gfs2_getbuf(gl, be64_to_cpu(*t), CREATE);
+ if (trylock_buffer(rabh)) {
+ if (!buffer_uptodate(rabh)) {
+ rabh->b_end_io = end_buffer_read_sync;
+ submit_bh(READA | REQ_META, rabh);
+ continue;
+ }
+ unlock_buffer(rabh);
+ }
+ brelse(rabh);
+ }
+}
+
/**
* lookup_metapath - Walk the metadata tree to a specific point
* @ip: The inode
@@ -432,12 +452,14 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
{
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
+ struct super_block *sb = sdp->sd_vfs;
struct buffer_head *dibh = mp->mp_bh[0];
u64 bn, dblock = 0;
unsigned n, i, blks, alloced = 0, iblks = 0, branch_start = 0;
unsigned dblks = 0;
unsigned ptrs_per_blk;
const unsigned end_of_metadata = height - 1;
+ int ret;
int eob = 0;
enum alloc_state state;
__be64 *ptr;
@@ -540,6 +562,15 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
dblock = bn;
while (n-- > 0)
*ptr++ = cpu_to_be64(bn++);
+ if (buffer_zeronew(bh_map)) {
+ ret = sb_issue_zeroout(sb, dblock, dblks,
+ GFP_NOFS);
+ if (ret) {
+ fs_err(sdp,
+ "Failed to zero data buffers\n");
+ clear_buffer_zeronew(bh_map);
+ }
+ }
break;
}
} while ((state != ALLOC_DATA) || !dblock);
@@ -668,76 +699,6 @@ int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsi
}
/**
- * recursive_scan - recursively scan through the end of a file
- * @ip: the inode
- * @dibh: the dinode buffer
- * @mp: the path through the metadata to the point to start
- * @height: the height the recursion is at
- * @block: the indirect block to look at
- * @first: 1 if this is the first block
- * @bc: the call to make for each piece of metadata
- * @data: data opaque to this function to pass to @bc
- *
- * When this is first called @height and @block should be zero and
- * @first should be 1.
- *
- * Returns: errno
- */
-
-static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
- struct metapath *mp, unsigned int height,
- u64 block, int first, block_call_t bc,
- void *data)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct buffer_head *bh = NULL;
- __be64 *top, *bottom;
- u64 bn;
- int error;
- int mh_size = sizeof(struct gfs2_meta_header);
-
- if (!height) {
- error = gfs2_meta_inode_buffer(ip, &bh);
- if (error)
- return error;
- dibh = bh;
-
- top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
- bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
- } else {
- error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
- if (error)
- return error;
-
- top = (__be64 *)(bh->b_data + mh_size) +
- (first ? mp->mp_list[height] : 0);
-
- bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
- }
-
- error = bc(ip, dibh, bh, top, bottom, height, data);
- if (error)
- goto out;
-
- if (height < ip->i_height - 1)
- for (; top < bottom; top++, first = 0) {
- if (!*top)
- continue;
-
- bn = be64_to_cpu(*top);
-
- error = recursive_scan(ip, dibh, mp, height + 1, bn,
- first, bc, data);
- if (error)
- break;
- }
-
-out:
- brelse(bh);
- return error;
-}
-
-/**
* do_strip - Look for a layer a particular layer of the file and strip it off
* @ip: the inode
* @dibh: the dinode buffer
@@ -752,9 +713,8 @@ out:
static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
struct buffer_head *bh, __be64 *top, __be64 *bottom,
- unsigned int height, void *data)
+ unsigned int height, struct strip_mine *sm)
{
- struct strip_mine *sm = data;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrp_list rlist;
u64 bn, bstart;
@@ -783,11 +743,6 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
else if (ip->i_depth)
revokes = sdp->sd_inptrs;
- if (ip != GFS2_I(sdp->sd_rindex))
- error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
- else if (!sdp->sd_rgrps)
- error = gfs2_ri_update(ip);
-
if (error)
return error;
@@ -805,7 +760,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
blen++;
else {
if (bstart)
- gfs2_rlist_add(sdp, &rlist, bstart);
+ gfs2_rlist_add(ip, &rlist, bstart);
bstart = bn;
blen = 1;
@@ -813,7 +768,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
}
if (bstart)
- gfs2_rlist_add(sdp, &rlist, bstart);
+ gfs2_rlist_add(ip, &rlist, bstart);
else
goto out; /* Nothing to do */
@@ -854,11 +809,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
blen++;
else {
if (bstart) {
- if (metadata)
- __gfs2_free_meta(ip, bstart, blen);
- else
- __gfs2_free_data(ip, bstart, blen);
-
+ __gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
}
@@ -870,11 +821,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
gfs2_add_inode_blocks(&ip->i_inode, -1);
}
if (bstart) {
- if (metadata)
- __gfs2_free_meta(ip, bstart, blen);
- else
- __gfs2_free_data(ip, bstart, blen);
-
+ __gfs2_free_blocks(ip, bstart, blen, metadata);
btotal += blen;
}
@@ -895,12 +842,82 @@ out_rg_gunlock:
out_rlist:
gfs2_rlist_free(&rlist);
out:
- if (ip != GFS2_I(sdp->sd_rindex))
- gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
return error;
}
/**
+ * recursive_scan - recursively scan through the end of a file
+ * @ip: the inode
+ * @dibh: the dinode buffer
+ * @mp: the path through the metadata to the point to start
+ * @height: the height the recursion is at
+ * @block: the indirect block to look at
+ * @first: 1 if this is the first block
+ * @sm: data opaque to this function to pass to @bc
+ *
+ * When this is first called @height and @block should be zero and
+ * @first should be 1.
+ *
+ * Returns: errno
+ */
+
+static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
+ struct metapath *mp, unsigned int height,
+ u64 block, int first, struct strip_mine *sm)
+{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct buffer_head *bh = NULL;
+ __be64 *top, *bottom;
+ u64 bn;
+ int error;
+ int mh_size = sizeof(struct gfs2_meta_header);
+
+ if (!height) {
+ error = gfs2_meta_inode_buffer(ip, &bh);
+ if (error)
+ return error;
+ dibh = bh;
+
+ top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
+ bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
+ } else {
+ error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
+ if (error)
+ return error;
+
+ top = (__be64 *)(bh->b_data + mh_size) +
+ (first ? mp->mp_list[height] : 0);
+
+ bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
+ }
+
+ error = do_strip(ip, dibh, bh, top, bottom, height, sm);
+ if (error)
+ goto out;
+
+ if (height < ip->i_height - 1) {
+
+ gfs2_metapath_ra(ip->i_gl, bh, top);
+
+ for (; top < bottom; top++, first = 0) {
+ if (!*top)
+ continue;
+
+ bn = be64_to_cpu(*top);
+
+ error = recursive_scan(ip, dibh, mp, height + 1, bn,
+ first, sm);
+ if (error)
+ break;
+ }
+ }
+out:
+ brelse(bh);
+ return error;
+}
+
+
+/**
* gfs2_block_truncate_page - Deal with zeroing out data for truncate
*
* This is partly borrowed from ext3.
@@ -1039,7 +1056,7 @@ static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
sm.sm_first = !!size;
sm.sm_height = height;
- error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
+ error = recursive_scan(ip, NULL, &mp, 0, 0, 1, &sm);
if (error)
break;
}
@@ -1224,6 +1241,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
if (ret)
return ret;
+ inode_dio_wait(inode);
+
oldsize = inode->i_size;
if (newsize >= oldsize)
return do_grow(inode, newsize);
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 091ee47..8ccad24 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -240,16 +240,15 @@ fail:
return error;
}
-static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
- u64 offset, unsigned int size)
+static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, __be64 *buf,
+ unsigned int size)
{
struct buffer_head *dibh;
int error;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (!error) {
- offset += sizeof(struct gfs2_dinode);
- memcpy(buf, dibh->b_data + offset, size);
+ memcpy(buf, dibh->b_data + sizeof(struct gfs2_dinode), size);
brelse(dibh);
}
@@ -261,13 +260,12 @@ static int gfs2_dir_read_stuffed(struct gfs2_inode *ip, char *buf,
* gfs2_dir_read_data - Read a data from a directory inode
* @ip: The GFS2 Inode
* @buf: The buffer to place result into
- * @offset: File offset to begin jdata_readng from
* @size: Amount of data to transfer
*
* Returns: The amount of data actually copied or the error
*/
-static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
- unsigned int size, unsigned ra)
+static int gfs2_dir_read_data(struct gfs2_inode *ip, __be64 *buf,
+ unsigned int size)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
u64 lblock, dblock;
@@ -275,24 +273,14 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
unsigned int o;
int copied = 0;
int error = 0;
- u64 disksize = i_size_read(&ip->i_inode);
-
- if (offset >= disksize)
- return 0;
-
- if (offset + size > disksize)
- size = disksize - offset;
-
- if (!size)
- return 0;
if (gfs2_is_stuffed(ip))
- return gfs2_dir_read_stuffed(ip, buf, offset, size);
+ return gfs2_dir_read_stuffed(ip, buf, size);
if (gfs2_assert_warn(sdp, gfs2_is_jdata(ip)))
return -EINVAL;
- lblock = offset;
+ lblock = 0;
o = do_div(lblock, sdp->sd_jbsize) + sizeof(struct gfs2_meta_header);
while (copied < size) {
@@ -311,8 +299,6 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
if (error || !dblock)
goto fail;
BUG_ON(extlen < 1);
- if (!ra)
- extlen = 1;
bh = gfs2_meta_ra(ip->i_gl, dblock, extlen);
} else {
error = gfs2_meta_read(ip->i_gl, dblock, DIO_WAIT, &bh);
@@ -328,7 +314,7 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset,
extlen--;
memcpy(buf, bh->b_data + o, amount);
brelse(bh);
- buf += amount;
+ buf += (amount/sizeof(__be64));
copied += amount;
lblock++;
o = sizeof(struct gfs2_meta_header);
@@ -339,6 +325,67 @@ fail:
return (copied) ? copied : error;
}
+/**
+ * gfs2_dir_get_hash_table - Get pointer to the dir hash table
+ * @ip: The inode in question
+ *
+ * Returns: The hash table or an error
+ */
+
+static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip)
+{
+ struct inode *inode = &ip->i_inode;
+ int ret;
+ u32 hsize;
+ __be64 *hc;
+
+ BUG_ON(!(ip->i_diskflags & GFS2_DIF_EXHASH));
+
+ hc = ip->i_hash_cache;
+ if (hc)
+ return hc;
+
+ hsize = 1 << ip->i_depth;
+ hsize *= sizeof(__be64);
+ if (hsize != i_size_read(&ip->i_inode)) {
+ gfs2_consist_inode(ip);
+ return ERR_PTR(-EIO);
+ }
+
+ hc = kmalloc(hsize, GFP_NOFS);
+ ret = -ENOMEM;
+ if (hc == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ ret = gfs2_dir_read_data(ip, hc, hsize);
+ if (ret < 0) {
+ kfree(hc);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&inode->i_lock);
+ if (ip->i_hash_cache)
+ kfree(hc);
+ else
+ ip->i_hash_cache = hc;
+ spin_unlock(&inode->i_lock);
+
+ return ip->i_hash_cache;
+}
+
+/**
+ * gfs2_dir_hash_inval - Invalidate dir hash
+ * @ip: The directory inode
+ *
+ * Must be called with an exclusive glock, or during glock invalidation.
+ */
+void gfs2_dir_hash_inval(struct gfs2_inode *ip)
+{
+ __be64 *hc = ip->i_hash_cache;
+ ip->i_hash_cache = NULL;
+ kfree(hc);
+}
+
static inline int gfs2_dirent_sentinel(const struct gfs2_dirent *dent)
{
return dent->de_inum.no_addr == 0 || dent->de_inum.no_formal_ino == 0;
@@ -686,17 +733,12 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no,
static int get_leaf_nr(struct gfs2_inode *dip, u32 index,
u64 *leaf_out)
{
- __be64 leaf_no;
- int error;
-
- error = gfs2_dir_read_data(dip, (char *)&leaf_no,
- index * sizeof(__be64),
- sizeof(__be64), 0);
- if (error != sizeof(u64))
- return (error < 0) ? error : -EIO;
-
- *leaf_out = be64_to_cpu(leaf_no);
+ __be64 *hash;
+ hash = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(hash))
+ return PTR_ERR(hash);
+ *leaf_out = be64_to_cpu(*(hash + index));
return 0;
}
@@ -966,6 +1008,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
for (x = 0; x < half_len; x++)
lp[x] = cpu_to_be64(bn);
+ gfs2_dir_hash_inval(dip);
+
error = gfs2_dir_write_data(dip, (char *)lp, start * sizeof(u64),
half_len * sizeof(u64));
if (error != half_len * sizeof(u64)) {
@@ -1052,70 +1096,54 @@ fail_brelse:
static int dir_double_exhash(struct gfs2_inode *dip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct buffer_head *dibh;
u32 hsize;
- u64 *buf;
- u64 *from, *to;
- u64 block;
- u64 disksize = i_size_read(&dip->i_inode);
+ u32 hsize_bytes;
+ __be64 *hc;
+ __be64 *hc2, *h;
int x;
int error = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != disksize) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
+ hsize_bytes = hsize * sizeof(__be64);
- /* Allocate both the "from" and "to" buffers in one big chunk */
+ hc = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(hc))
+ return PTR_ERR(hc);
- buf = kcalloc(3, sdp->sd_hash_bsize, GFP_NOFS);
- if (!buf)
+ h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS);
+ if (!hc2)
return -ENOMEM;
- for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) {
- error = gfs2_dir_read_data(dip, (char *)buf,
- block * sdp->sd_hash_bsize,
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto fail;
- }
-
- from = buf;
- to = (u64 *)((char *)buf + sdp->sd_hash_bsize);
-
- for (x = sdp->sd_hash_ptrs; x--; from++) {
- *to++ = *from; /* No endianess worries */
- *to++ = *from;
- }
+ error = gfs2_meta_inode_buffer(dip, &dibh);
+ if (error)
+ goto out_kfree;
- error = gfs2_dir_write_data(dip,
- (char *)buf + sdp->sd_hash_bsize,
- block * sdp->sd_sb.sb_bsize,
- sdp->sd_sb.sb_bsize);
- if (error != sdp->sd_sb.sb_bsize) {
- if (error >= 0)
- error = -EIO;
- goto fail;
- }
+ for (x = 0; x < hsize; x++) {
+ *h++ = *hc;
+ *h++ = *hc;
+ hc++;
}
- kfree(buf);
+ error = gfs2_dir_write_data(dip, (char *)hc2, 0, hsize_bytes * 2);
+ if (error != (hsize_bytes * 2))
+ goto fail;
- error = gfs2_meta_inode_buffer(dip, &dibh);
- if (!gfs2_assert_withdraw(sdp, !error)) {
- dip->i_depth++;
- gfs2_dinode_out(dip, dibh->b_data);
- brelse(dibh);
- }
-
- return error;
+ gfs2_dir_hash_inval(dip);
+ dip->i_hash_cache = hc2;
+ dip->i_depth++;
+ gfs2_dinode_out(dip, dibh->b_data);
+ brelse(dibh);
+ return 0;
fail:
- kfree(buf);
+ /* Replace original hash table & size */
+ gfs2_dir_write_data(dip, (char *)hc, 0, hsize_bytes);
+ i_size_write(&dip->i_inode, hsize_bytes);
+ gfs2_dinode_out(dip, dibh->b_data);
+ brelse(dibh);
+out_kfree:
+ kfree(hc2);
return error;
}
@@ -1348,6 +1376,7 @@ out:
return error;
}
+
/**
* dir_e_read - Reads the entries from a directory into a filldir buffer
* @dip: dinode pointer
@@ -1362,9 +1391,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
filldir_t filldir)
{
struct gfs2_inode *dip = GFS2_I(inode);
- struct gfs2_sbd *sdp = GFS2_SB(inode);
u32 hsize, len = 0;
- u32 ht_offset, lp_offset, ht_offset_cur = -1;
u32 hash, index;
__be64 *lp;
int copied = 0;
@@ -1372,37 +1399,17 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
unsigned depth = 0;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != i_size_read(inode)) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
-
hash = gfs2_dir_offset2hash(*offset);
index = hash >> (32 - dip->i_depth);
- lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
- if (!lp)
- return -ENOMEM;
+ lp = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(lp))
+ return PTR_ERR(lp);
while (index < hsize) {
- lp_offset = index & (sdp->sd_hash_ptrs - 1);
- ht_offset = index - lp_offset;
-
- if (ht_offset_cur != ht_offset) {
- error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(__be64),
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto out;
- }
- ht_offset_cur = ht_offset;
- }
-
error = gfs2_dir_read_leaf(inode, offset, opaque, filldir,
&copied, &depth,
- be64_to_cpu(lp[lp_offset]));
+ be64_to_cpu(lp[index]));
if (error)
break;
@@ -1410,8 +1417,6 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque,
index = (index & ~(len - 1)) + len;
}
-out:
- kfree(lp);
if (error > 0)
error = 0;
return error;
@@ -1676,7 +1681,6 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
const struct qstr *name = &dentry->d_name;
struct gfs2_dirent *dent, *prev = NULL;
struct buffer_head *bh;
- int error;
/* Returns _either_ the entry (if its first in block) or the
previous entry otherwise */
@@ -1705,22 +1709,15 @@ int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry)
}
brelse(bh);
- error = gfs2_meta_inode_buffer(dip, &bh);
- if (error)
- return error;
-
if (!dip->i_entries)
gfs2_consist_inode(dip);
- gfs2_trans_add_bh(dip->i_gl, bh, 1);
dip->i_entries--;
dip->i_inode.i_mtime = dip->i_inode.i_ctime = CURRENT_TIME;
if (S_ISDIR(dentry->d_inode->i_mode))
drop_nlink(&dip->i_inode);
- gfs2_dinode_out(dip, bh->b_data);
- brelse(bh);
mark_inode_dirty(&dip->i_inode);
- return error;
+ return 0;
}
/**
@@ -1810,10 +1807,6 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
if (error)
goto out_put;
- error = gfs2_rindex_hold(sdp, &dip->i_alloc->al_ri_gh);
- if (error)
- goto out_qs;
-
/* Count the number of leaves */
bh = leaf_bh;
@@ -1828,7 +1821,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
if (blk != leaf_no)
brelse(bh);
- gfs2_rlist_add(sdp, &rlist, blk);
+ gfs2_rlist_add(dip, &rlist, blk);
l_blocks++;
}
@@ -1892,8 +1885,6 @@ out_rg_gunlock:
gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
out_rlist:
gfs2_rlist_free(&rlist);
- gfs2_glock_dq_uninit(&dip->i_alloc->al_ri_gh);
-out_qs:
gfs2_quota_unhold(dip);
out_put:
gfs2_alloc_put(dip);
@@ -1914,43 +1905,22 @@ out:
int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct buffer_head *bh;
struct gfs2_leaf *leaf;
u32 hsize, len;
- u32 ht_offset, lp_offset, ht_offset_cur = -1;
u32 index = 0, next_index;
__be64 *lp;
u64 leaf_no;
int error = 0, last;
hsize = 1 << dip->i_depth;
- if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) {
- gfs2_consist_inode(dip);
- return -EIO;
- }
- lp = kmalloc(sdp->sd_hash_bsize, GFP_NOFS);
- if (!lp)
- return -ENOMEM;
+ lp = gfs2_dir_get_hash_table(dip);
+ if (IS_ERR(lp))
+ return PTR_ERR(lp);
while (index < hsize) {
- lp_offset = index & (sdp->sd_hash_ptrs - 1);
- ht_offset = index - lp_offset;
-
- if (ht_offset_cur != ht_offset) {
- error = gfs2_dir_read_data(dip, (char *)lp,
- ht_offset * sizeof(__be64),
- sdp->sd_hash_bsize, 1);
- if (error != sdp->sd_hash_bsize) {
- if (error >= 0)
- error = -EIO;
- goto out;
- }
- ht_offset_cur = ht_offset;
- }
-
- leaf_no = be64_to_cpu(lp[lp_offset]);
+ leaf_no = be64_to_cpu(lp[index]);
if (leaf_no) {
error = get_leaf(dip, leaf_no, &bh);
if (error)
@@ -1976,7 +1946,6 @@ int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip)
}
out:
- kfree(lp);
return error;
}
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h
index e686af1..ff5772f 100644
--- a/fs/gfs2/dir.h
+++ b/fs/gfs2/dir.h
@@ -35,6 +35,7 @@ extern int gfs2_diradd_alloc_required(struct inode *dir,
const struct qstr *filename);
extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block,
struct buffer_head **bhp);
+extern void gfs2_dir_hash_inval(struct gfs2_inode *ip);
static inline u32 gfs2_disk_hash(const char *data, int len)
{
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index a9f5cbe..ce36a56 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -59,15 +59,24 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int origin)
struct gfs2_holder i_gh;
loff_t error;
- if (origin == 2) {
+ switch (origin) {
+ case SEEK_END: /* These reference inode->i_size */
+ case SEEK_DATA:
+ case SEEK_HOLE:
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY,
&i_gh);
if (!error) {
- error = generic_file_llseek_unlocked(file, offset, origin);
+ error = generic_file_llseek(file, offset, origin);
gfs2_glock_dq_uninit(&i_gh);
}
- } else
- error = generic_file_llseek_unlocked(file, offset, origin);
+ break;
+ case SEEK_CUR:
+ case SEEK_SET:
+ error = generic_file_llseek(file, offset, origin);
+ break;
+ default:
+ error = -EINVAL;
+ }
return error;
}
@@ -174,7 +183,9 @@ void gfs2_set_inode_flags(struct inode *inode)
struct gfs2_inode *ip = GFS2_I(inode);
unsigned int flags = inode->i_flags;
- flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+ flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_NOSEC);
+ if ((ip->i_eattr == 0) && !is_sxid(inode->i_mode))
+ inode->i_flags |= S_NOSEC;
if (ip->i_diskflags & GFS2_DIF_IMMUTABLE)
flags |= S_IMMUTABLE;
if (ip->i_diskflags & GFS2_DIF_APPENDONLY)
@@ -243,7 +254,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask)
!capable(CAP_LINUX_IMMUTABLE))
goto out;
if (!IS_IMMUTABLE(inode)) {
- error = gfs2_permission(inode, MAY_WRITE, 0);
+ error = gfs2_permission(inode, MAY_WRITE);
if (error)
goto out;
}
@@ -355,8 +366,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unsigned int data_blocks, ind_blocks, rblocks;
struct gfs2_holder gh;
struct gfs2_alloc *al;
+ loff_t size;
int ret;
+ /* Wait if fs is frozen. This is racy so we check again later on
+ * and retry if the fs has been frozen after the page lock has
+ * been acquired
+ */
+ vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret)
@@ -365,8 +383,15 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
- if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE))
+ if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
+ lock_page(page);
+ if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
+ ret = -EAGAIN;
+ unlock_page(page);
+ }
goto out_unlock;
+ }
+
ret = -ENOMEM;
al = gfs2_alloc_get(ip);
if (al == NULL)
@@ -386,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
rblocks += data_blocks ? data_blocks : 1;
if (ind_blocks || data_blocks) {
rblocks += RES_STATFS + RES_QUOTA;
- rblocks += gfs2_rg_blocks(al);
+ rblocks += gfs2_rg_blocks(ip);
}
ret = gfs2_trans_begin(sdp, rblocks, 0);
if (ret)
@@ -394,21 +419,29 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
lock_page(page);
ret = -EINVAL;
- last_index = ip->i_inode.i_size >> PAGE_CACHE_SHIFT;
- if (page->index > last_index)
- goto out_unlock_page;
+ size = i_size_read(inode);
+ last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+ /* Check page index against inode size */
+ if (size == 0 || (page->index > last_index))
+ goto out_trans_end;
+
+ ret = -EAGAIN;
+ /* If truncated, we must retry the operation, we may have raced
+ * with the glock demotion code.
+ */
+ if (!PageUptodate(page) || page->mapping != inode->i_mapping)
+ goto out_trans_end;
+
+ /* Unstuff, if required, and allocate backing blocks for page */
ret = 0;
- if (!PageUptodate(page) || page->mapping != ip->i_inode.i_mapping)
- goto out_unlock_page;
- if (gfs2_is_stuffed(ip)) {
+ if (gfs2_is_stuffed(ip))
ret = gfs2_unstuff_dinode(ip, page);
- if (ret)
- goto out_unlock_page;
- }
- ret = gfs2_allocate_page_backing(page);
+ if (ret == 0)
+ ret = gfs2_allocate_page_backing(page);
-out_unlock_page:
- unlock_page(page);
+out_trans_end:
+ if (ret)
+ unlock_page(page);
gfs2_trans_end(sdp);
out_trans_fail:
gfs2_inplace_release(ip);
@@ -420,11 +453,17 @@ out_unlock:
gfs2_glock_dq(&gh);
out:
gfs2_holder_uninit(&gh);
- if (ret == -ENOMEM)
- ret = VM_FAULT_OOM;
- else if (ret)
- ret = VM_FAULT_SIGBUS;
- return ret;
+ if (ret == 0) {
+ set_page_dirty(page);
+ /* This check must be post dropping of transaction lock */
+ if (inode->i_sb->s_frozen == SB_UNFROZEN) {
+ wait_on_page_writeback(page);
+ } else {
+ ret = -EAGAIN;
+ unlock_page(page);
+ }
+ }
+ return block_page_mkwrite_return(ret);
}
static const struct vm_operations_struct gfs2_vm_ops = {
@@ -544,21 +583,39 @@ static int gfs2_close(struct inode *inode, struct file *file)
/**
* gfs2_fsync - sync the dirty data for a file (across the cluster)
- * @file: the file that points to the dentry (we ignore this)
+ * @file: the file that points to the dentry
+ * @start: the start position in the file to sync
+ * @end: the end position in the file to sync
* @datasync: set if we can ignore timestamp changes
*
- * The VFS will flush data for us. We only need to worry
- * about metadata here.
+ * We split the data flushing here so that we don't wait for the data
+ * until after we've also sent the metadata to disk. Note that for
+ * data=ordered, we will write & wait for the data at the log flush
+ * stage anyway, so this is unlikely to make much of a difference
+ * except in the data=writeback case.
+ *
+ * If the fdatawrite fails due to any reason except -EIO, we will
+ * continue the remainder of the fsync, although we'll still report
+ * the error at the end. This is to match filemap_write_and_wait_range()
+ * behaviour.
*
* Returns: errno
*/
-static int gfs2_fsync(struct file *file, int datasync)
+static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
+ int datasync)
{
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
int sync_state = inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC);
struct gfs2_inode *ip = GFS2_I(inode);
- int ret;
+ int ret, ret1 = 0;
+
+ if (mapping->nrpages) {
+ ret1 = filemap_fdatawrite_range(mapping, start, end);
+ if (ret1 == -EIO)
+ return ret1;
+ }
if (datasync)
sync_state &= ~I_DIRTY_SYNC;
@@ -567,10 +624,15 @@ static int gfs2_fsync(struct file *file, int datasync)
ret = sync_inode_metadata(inode, 1);
if (ret)
return ret;
- gfs2_ail_flush(ip->i_gl);
+ if (gfs2_is_jdata(ip))
+ filemap_write_and_wait(mapping);
+ gfs2_ail_flush(ip->i_gl, 1);
}
- return 0;
+ if (mapping->nrpages)
+ ret = filemap_fdatawait_range(mapping, start, end);
+
+ return ret ? ret : ret1;
}
/**
@@ -607,135 +669,18 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return generic_file_aio_write(iocb, iov, nr_segs, pos);
}
-static int empty_write_end(struct page *page, unsigned from,
- unsigned to, int mode)
-{
- struct inode *inode = page->mapping->host;
- struct gfs2_inode *ip = GFS2_I(inode);
- struct buffer_head *bh;
- unsigned offset, blksize = 1 << inode->i_blkbits;
- pgoff_t end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
-
- zero_user(page, from, to-from);
- mark_page_accessed(page);
-
- if (page->index < end_index || !(mode & FALLOC_FL_KEEP_SIZE)) {
- if (!gfs2_is_writeback(ip))
- gfs2_page_add_databufs(ip, page, from, to);
-
- block_commit_write(page, from, to);
- return 0;
- }
-
- offset = 0;
- bh = page_buffers(page);
- while (offset < to) {
- if (offset >= from) {
- set_buffer_uptodate(bh);
- mark_buffer_dirty(bh);
- clear_buffer_new(bh);
- write_dirty_buffer(bh, WRITE);
- }
- offset += blksize;
- bh = bh->b_this_page;
- }
-
- offset = 0;
- bh = page_buffers(page);
- while (offset < to) {
- if (offset >= from) {
- wait_on_buffer(bh);
- if (!buffer_uptodate(bh))
- return -EIO;
- }
- offset += blksize;
- bh = bh->b_this_page;
- }
- return 0;
-}
-
-static int needs_empty_write(sector_t block, struct inode *inode)
-{
- int error;
- struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
-
- bh_map.b_size = 1 << inode->i_blkbits;
- error = gfs2_block_map(inode, block, &bh_map, 0);
- if (unlikely(error))
- return error;
- return !buffer_mapped(&bh_map);
-}
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to,
- int mode)
-{
- struct inode *inode = page->mapping->host;
- unsigned start, end, next, blksize;
- sector_t block = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- int ret;
-
- blksize = 1 << inode->i_blkbits;
- next = end = 0;
- while (next < from) {
- next += blksize;
- block++;
- }
- start = next;
- do {
- next += blksize;
- ret = needs_empty_write(block, inode);
- if (unlikely(ret < 0))
- return ret;
- if (ret == 0) {
- if (end) {
- ret = __block_write_begin(page, start, end - start,
- gfs2_block_map);
- if (unlikely(ret))
- return ret;
- ret = empty_write_end(page, start, end, mode);
- if (unlikely(ret))
- return ret;
- end = 0;
- }
- start = next;
- }
- else
- end = next;
- block++;
- } while (next < to);
-
- if (end) {
- ret = __block_write_begin(page, start, end - start, gfs2_block_map);
- if (unlikely(ret))
- return ret;
- ret = empty_write_end(page, start, end, mode);
- if (unlikely(ret))
- return ret;
- }
-
- return 0;
-}
-
static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
int mode)
{
struct gfs2_inode *ip = GFS2_I(inode);
struct buffer_head *dibh;
int error;
- u64 start = offset >> PAGE_CACHE_SHIFT;
- unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
- u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
- pgoff_t curr;
- struct page *page;
- unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
- unsigned int from, to;
-
- if (!end_offset)
- end_offset = PAGE_CACHE_SIZE;
+ unsigned int nr_blks;
+ sector_t lblock = offset >> inode->i_blkbits;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (unlikely(error))
- goto out;
+ return error;
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
@@ -745,40 +690,31 @@ static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
goto out;
}
- curr = start;
- offset = start << PAGE_CACHE_SHIFT;
- from = start_offset;
- to = PAGE_CACHE_SIZE;
- while (curr <= end) {
- page = grab_cache_page_write_begin(inode->i_mapping, curr,
- AOP_FLAG_NOFS);
- if (unlikely(!page)) {
- error = -ENOMEM;
- goto out;
- }
+ while (len) {
+ struct buffer_head bh_map = { .b_state = 0, .b_blocknr = 0 };
+ bh_map.b_size = len;
+ set_buffer_zeronew(&bh_map);
- if (curr == end)
- to = end_offset;
- error = write_empty_blocks(page, from, to, mode);
- if (!error && offset + to > inode->i_size &&
- !(mode & FALLOC_FL_KEEP_SIZE)) {
- i_size_write(inode, offset + to);
- }
- unlock_page(page);
- page_cache_release(page);
- if (error)
+ error = gfs2_block_map(inode, lblock, &bh_map, 1);
+ if (unlikely(error))
goto out;
- curr++;
- offset += PAGE_CACHE_SIZE;
- from = 0;
+ len -= bh_map.b_size;
+ nr_blks = bh_map.b_size >> inode->i_blkbits;
+ lblock += nr_blks;
+ if (!buffer_new(&bh_map))
+ continue;
+ if (unlikely(!buffer_zeronew(&bh_map))) {
+ error = -EIO;
+ goto out;
+ }
}
+ if (offset + len > inode->i_size && !(mode & FALLOC_FL_KEEP_SIZE))
+ i_size_write(inode, offset + len);
- gfs2_dinode_out(ip, dibh->b_data);
mark_inode_dirty(inode);
- brelse(dibh);
-
out:
+ brelse(dibh);
return error;
}
@@ -786,7 +722,7 @@ static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
unsigned int *data_blocks, unsigned int *ind_blocks)
{
const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+ unsigned int max_blocks = ip->i_rgd->rd_free_clone;
unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
for (tmp = max_data; tmp > sdp->sd_diptrs;) {
@@ -818,6 +754,7 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
int error;
loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1);
loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+ loff_t max_chunk_size = UINT_MAX & bsize_mask;
next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
/* We only support the FALLOC_FL_KEEP_SIZE mode */
@@ -871,11 +808,12 @@ retry:
goto out_qunlock;
}
max_bytes = bytes;
- calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+ calc_max_reserv(ip, (len > max_chunk_size)? max_chunk_size: len,
+ &max_bytes, &data_blocks, &ind_blocks);
al->al_requested = data_blocks + ind_blocks;
rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
- RES_RG_HDR + gfs2_rg_blocks(al);
+ RES_RG_HDR + gfs2_rg_blocks(ip);
if (gfs2_is_jdata(ip))
rblocks += data_blocks ? data_blocks : 1;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1c1336e..88e8a23 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -409,6 +409,10 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state)
if (held1 && held2 && list_empty(&gl->gl_holders))
clear_bit(GLF_QUEUED, &gl->gl_flags);
+ if (new_state != gl->gl_target)
+ /* shorten our minimum hold time */
+ gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
+ GL_GLOCK_MIN_HOLD);
gl->gl_state = new_state;
gl->gl_tchange = jiffies;
}
@@ -668,7 +672,7 @@ static void glock_work_func(struct work_struct *work)
gl->gl_demote_state != LM_ST_EXCLUSIVE) {
unsigned long holdtime, now = jiffies;
- holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
if (time_before(now, holdtime))
delay = holdtime - now;
@@ -679,9 +683,14 @@ static void glock_work_func(struct work_struct *work)
}
run_queue(gl, 0);
spin_unlock(&gl->gl_spin);
- if (!delay ||
- queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ if (!delay)
gfs2_glock_put(gl);
+ else {
+ if (gl->gl_name.ln_type != LM_TYPE_INODE)
+ delay = 0;
+ if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
+ gfs2_glock_put(gl);
+ }
if (drop_ref)
gfs2_glock_put(gl);
}
@@ -743,6 +752,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
gl->gl_tchange = jiffies;
gl->gl_object = NULL;
gl->gl_sbd = sdp;
+ gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
INIT_WORK(&gl->gl_delete, delete_work_func);
@@ -855,8 +865,15 @@ static int gfs2_glock_demote_wait(void *word)
static void wait_on_holder(struct gfs2_holder *gh)
{
+ unsigned long time1 = jiffies;
+
might_sleep();
wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE);
+ if (time_after(jiffies, time1 + HZ)) /* have we waited > a second? */
+ /* Lengthen the minimum hold time. */
+ gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time +
+ GL_GLOCK_HOLD_INCR,
+ GL_GLOCK_MAX_HOLD);
}
static void wait_on_demote(struct gfs2_glock *gl)
@@ -1093,8 +1110,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
gfs2_glock_hold(gl);
if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
- !test_bit(GLF_DEMOTE, &gl->gl_flags))
- delay = gl->gl_ops->go_min_hold_time;
+ !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
+ gl->gl_name.ln_type == LM_TYPE_INODE)
+ delay = gl->gl_hold_time;
if (queue_delayed_work(glock_workqueue, &gl->gl_work, delay) == 0)
gfs2_glock_put(gl);
}
@@ -1273,12 +1291,13 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
unsigned long now = jiffies;
gfs2_glock_hold(gl);
- holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time;
- if (test_bit(GLF_QUEUED, &gl->gl_flags)) {
+ holdtime = gl->gl_tchange + gl->gl_hold_time;
+ if (test_bit(GLF_QUEUED, &gl->gl_flags) &&
+ gl->gl_name.ln_type == LM_TYPE_INODE) {
if (time_before(now, holdtime))
delay = holdtime - now;
if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
- delay = gl->gl_ops->go_min_hold_time;
+ delay = gl->gl_hold_time;
}
spin_lock(&gl->gl_spin);
@@ -1667,7 +1686,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
dtime *= 1000000/HZ; /* demote time in uSec */
if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
dtime = 0;
- gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d\n",
+ gfs2_print_dbg(seq, "G: s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d v:%d r:%d m:%ld\n",
state2str(gl->gl_state),
gl->gl_name.ln_type,
(unsigned long long)gl->gl_name.ln_number,
@@ -1676,7 +1695,7 @@ static int __dump_glock(struct seq_file *seq, const struct gfs2_glock *gl)
state2str(gl->gl_demote_state), dtime,
atomic_read(&gl->gl_ail_count),
atomic_read(&gl->gl_revokes),
- atomic_read(&gl->gl_ref));
+ atomic_read(&gl->gl_ref), gl->gl_hold_time);
list_for_each_entry(gh, &gl->gl_holders, gh_list) {
error = dump_holder(seq, gh);
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 6b2f757..2553b85 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -113,6 +113,12 @@ enum {
#define GLR_TRYFAILED 13
+#define GL_GLOCK_MAX_HOLD (long)(HZ / 5)
+#define GL_GLOCK_DFT_HOLD (long)(HZ / 5)
+#define GL_GLOCK_MIN_HOLD (long)(10)
+#define GL_GLOCK_HOLD_INCR (long)(HZ / 20)
+#define GL_GLOCK_HOLD_DECR (long)(HZ / 40)
+
struct lm_lockops {
const char *lm_proto_name;
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
@@ -195,7 +201,7 @@ int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs);
void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs);
-__attribute__ ((format(printf, 2, 3)))
+__printf(2, 3)
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...);
/**
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 2cca293..1656df7 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -26,41 +26,57 @@
#include "rgrp.h"
#include "util.h"
#include "trans.h"
+#include "dir.h"
+
+static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
+{
+ fs_err(gl->gl_sbd, "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page state 0x%lx\n",
+ bh, (unsigned long long)bh->b_blocknr, bh->b_state,
+ bh->b_page->mapping, bh->b_page->flags);
+ fs_err(gl->gl_sbd, "AIL glock %u:%llu mapping %p\n",
+ gl->gl_name.ln_type, gl->gl_name.ln_number,
+ gfs2_glock2aspace(gl));
+ gfs2_lm_withdraw(gl->gl_sbd, "AIL error\n");
+}
/**
* __gfs2_ail_flush - remove all buffers for a given lock from the AIL
* @gl: the glock
+ * @fsync: set when called from fsync (not all buffers will be clean)
*
* None of the buffers should be dirty, locked, or pinned.
*/
-static void __gfs2_ail_flush(struct gfs2_glock *gl)
+static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
struct list_head *head = &gl->gl_ail_list;
- struct gfs2_bufdata *bd;
+ struct gfs2_bufdata *bd, *tmp;
struct buffer_head *bh;
+ const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock);
+ sector_t blocknr;
+ gfs2_log_lock(sdp);
spin_lock(&sdp->sd_ail_lock);
- while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata,
- bd_ail_gl_list);
+ list_for_each_entry_safe(bd, tmp, head, bd_ail_gl_list) {
bh = bd->bd_bh;
- gfs2_remove_from_ail(bd);
- bd->bd_bh = NULL;
+ if (bh->b_state & b_state) {
+ if (fsync)
+ continue;
+ gfs2_ail_error(gl, bh);
+ }
+ blocknr = bh->b_blocknr;
bh->b_private = NULL;
- spin_unlock(&sdp->sd_ail_lock);
+ gfs2_remove_from_ail(bd); /* drops ref on bh */
- bd->bd_blkno = bh->b_blocknr;
- gfs2_log_lock(sdp);
- gfs2_assert_withdraw(sdp, !buffer_busy(bh));
- gfs2_trans_add_revoke(sdp, bd);
- gfs2_log_unlock(sdp);
+ bd->bd_bh = NULL;
+ bd->bd_blkno = blocknr;
- spin_lock(&sdp->sd_ail_lock);
+ gfs2_trans_add_revoke(sdp, bd);
}
- gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
+ BUG_ON(!fsync && atomic_read(&gl->gl_ail_count));
spin_unlock(&sdp->sd_ail_lock);
+ gfs2_log_unlock(sdp);
}
@@ -83,13 +99,13 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
BUG_ON(current->journal_info);
current->journal_info = &tr;
- __gfs2_ail_flush(gl);
+ __gfs2_ail_flush(gl, 0);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
-void gfs2_ail_flush(struct gfs2_glock *gl)
+void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
{
struct gfs2_sbd *sdp = gl->gl_sbd;
unsigned int revokes = atomic_read(&gl->gl_ail_count);
@@ -101,7 +117,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl)
ret = gfs2_trans_begin(sdp, 0, revokes);
if (ret)
return;
- __gfs2_ail_flush(gl);
+ __gfs2_ail_flush(gl, fsync);
gfs2_trans_end(sdp);
gfs2_log_flush(sdp, NULL);
}
@@ -118,6 +134,7 @@ void gfs2_ail_flush(struct gfs2_glock *gl)
static void rgrp_go_sync(struct gfs2_glock *gl)
{
struct address_space *metamapping = gfs2_glock2aspace(gl);
+ struct gfs2_rgrpd *rgd;
int error;
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
@@ -129,6 +146,12 @@ static void rgrp_go_sync(struct gfs2_glock *gl)
error = filemap_fdatawait(metamapping);
mapping_set_error(metamapping, error);
gfs2_ail_empty_gl(gl);
+
+ spin_lock(&gl->gl_spin);
+ rgd = gl->gl_object;
+ if (rgd)
+ gfs2_free_clones(rgd);
+ spin_unlock(&gl->gl_spin);
}
/**
@@ -218,6 +241,7 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
if (ip) {
set_bit(GIF_INVALID, &ip->i_flags);
forget_all_cached_acls(&ip->i_inode);
+ gfs2_dir_hash_inval(ip);
}
}
@@ -275,7 +299,7 @@ static void gfs2_set_nlink(struct inode *inode, u32 nlink)
if (nlink == 0)
clear_nlink(inode);
else
- inode->i_nlink = nlink;
+ set_nlink(inode, nlink);
}
}
@@ -316,6 +340,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
ip->i_generation = be64_to_cpu(str->di_generation);
ip->i_diskflags = be32_to_cpu(str->di_flags);
+ ip->i_eattr = be64_to_cpu(str->di_eattr);
+ /* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
gfs2_set_inode_flags(&ip->i_inode);
height = be16_to_cpu(str->di_height);
if (unlikely(height > GFS2_MAX_META_HEIGHT))
@@ -328,7 +354,6 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
ip->i_depth = (u8)depth;
ip->i_entries = be32_to_cpu(str->di_entries);
- ip->i_eattr = be64_to_cpu(str->di_eattr);
if (S_ISREG(ip->i_inode.i_mode))
gfs2_set_aops(&ip->i_inode);
@@ -427,33 +452,6 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl)
}
/**
- * rgrp_go_lock - operation done after an rgrp lock is locked by
- * a first holder on this node.
- * @gl: the glock
- * @flags:
- *
- * Returns: errno
- */
-
-static int rgrp_go_lock(struct gfs2_holder *gh)
-{
- return gfs2_rgrp_bh_get(gh->gh_gl->gl_object);
-}
-
-/**
- * rgrp_go_unlock - operation done before an rgrp lock is unlocked by
- * a last holder on this node.
- * @gl: the glock
- * @flags:
- *
- */
-
-static void rgrp_go_unlock(struct gfs2_holder *gh)
-{
- gfs2_rgrp_bh_put(gh->gh_gl->gl_object);
-}
-
-/**
* trans_go_sync - promote/demote the transaction glock
* @gl: the glock
* @state: the requested state
@@ -549,18 +547,16 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_lock = inode_go_lock,
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
- .go_min_hold_time = HZ / 5,
.go_flags = GLOF_ASPACE,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_xmote_th = rgrp_go_sync,
.go_inval = rgrp_go_inval,
- .go_lock = rgrp_go_lock,
- .go_unlock = rgrp_go_unlock,
+ .go_lock = gfs2_rgrp_go_lock,
+ .go_unlock = gfs2_rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
.go_type = LM_TYPE_RGRP,
- .go_min_hold_time = HZ / 5,
.go_flags = GLOF_ASPACE,
};
diff --git a/fs/gfs2/glops.h b/fs/gfs2/glops.h
index 6fce409..bf95a2d 100644
--- a/fs/gfs2/glops.h
+++ b/fs/gfs2/glops.h
@@ -23,6 +23,6 @@ extern const struct gfs2_glock_operations gfs2_quota_glops;
extern const struct gfs2_glock_operations gfs2_journal_glops;
extern const struct gfs2_glock_operations *gfs2_glops_list[];
-extern void gfs2_ail_flush(struct gfs2_glock *gl);
+extern void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync);
#endif /* __GLOPS_DOT_H__ */
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 81206e7..7389dfd 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -18,6 +18,7 @@
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
#include <linux/completion.h>
+#include <linux/rbtree.h>
#define DIO_WAIT 0x00000010
#define DIO_METADATA 0x00000020
@@ -78,8 +79,7 @@ struct gfs2_bitmap {
};
struct gfs2_rgrpd {
- struct list_head rd_list; /* Link with superblock */
- struct list_head rd_list_mru;
+ struct rb_node rd_node; /* Link with superblock */
struct gfs2_glock *rd_gl; /* Glock for this rgrp */
u64 rd_addr; /* grp block disk address */
u64 rd_data0; /* first data location */
@@ -91,10 +91,7 @@ struct gfs2_rgrpd {
u32 rd_dinodes;
u64 rd_igeneration;
struct gfs2_bitmap *rd_bits;
- struct mutex rd_mutex;
- struct gfs2_log_element rd_le;
struct gfs2_sbd *rd_sbd;
- unsigned int rd_bh_count;
u32 rd_last_alloc;
u32 rd_flags;
#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */
@@ -106,12 +103,15 @@ struct gfs2_rgrpd {
enum gfs2_state_bits {
BH_Pinned = BH_PrivateStart,
BH_Escaped = BH_PrivateStart + 1,
+ BH_Zeronew = BH_PrivateStart + 2,
};
BUFFER_FNS(Pinned, pinned)
TAS_BUFFER_FNS(Pinned, pinned)
BUFFER_FNS(Escaped, escaped)
TAS_BUFFER_FNS(Escaped, escaped)
+BUFFER_FNS(Zeronew, zeronew)
+TAS_BUFFER_FNS(Zeronew, zeronew)
struct gfs2_bufdata {
struct buffer_head *bd_bh;
@@ -163,7 +163,6 @@ struct gfs2_glock_operations {
int (*go_dump)(struct seq_file *seq, const struct gfs2_glock *gl);
void (*go_callback) (struct gfs2_glock *gl);
const int go_type;
- const unsigned long go_min_hold_time;
const unsigned long go_flags;
#define GLOF_ASPACE 1
};
@@ -221,6 +220,7 @@ struct gfs2_glock {
unsigned int gl_hash;
unsigned long gl_demote_time; /* time of first demote request */
+ long gl_hold_time;
struct list_head gl_holders;
const struct gfs2_glock_operations *gl_ops;
@@ -246,7 +246,6 @@ struct gfs2_glock {
struct gfs2_alloc {
/* Quota stuff */
-
struct gfs2_quota_data *al_qd[2*MAXQUOTAS];
struct gfs2_holder al_qd_ghs[2*MAXQUOTAS];
unsigned int al_qd_num;
@@ -255,18 +254,13 @@ struct gfs2_alloc {
u32 al_alloced; /* Filled in by gfs2_alloc_*() */
/* Filled in by gfs2_inplace_reserve() */
-
- unsigned int al_line;
- char *al_file;
- struct gfs2_holder al_ri_gh;
struct gfs2_holder al_rgd_gh;
- struct gfs2_rgrpd *al_rgd;
-
};
enum {
GIF_INVALID = 0,
GIF_QD_LOCKED = 1,
+ GIF_ALLOC_FAILED = 2,
GIF_SW_PAGED = 3,
};
@@ -282,9 +276,11 @@ struct gfs2_inode {
struct gfs2_holder i_iopen_gh;
struct gfs2_holder i_gh; /* for prepare/commit_write only */
struct gfs2_alloc *i_alloc;
+ struct gfs2_rgrpd *i_rgd;
u64 i_goal; /* goal block for allocations */
struct rw_semaphore i_rw_mutex;
struct list_head i_trunc_list;
+ __be64 *i_hash_cache;
u32 i_entries;
u32 i_diskflags;
u8 i_height;
@@ -573,9 +569,7 @@ struct gfs2_sbd {
int sd_rindex_uptodate;
spinlock_t sd_rindex_spin;
struct mutex sd_rindex_mutex;
- struct list_head sd_rindex_list;
- struct list_head sd_rindex_mru_list;
- struct gfs2_rgrpd *sd_rindex_forward;
+ struct rb_root sd_rindex_tree;
unsigned int sd_rgrps;
unsigned int sd_max_rg_data;
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 03e0c52..cfd4959 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -307,7 +307,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
}
if (!is_root) {
- error = gfs2_permission(dir, MAY_EXEC, 0);
+ error = gfs2_permission(dir, MAY_EXEC);
if (error)
goto out;
}
@@ -337,7 +337,7 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
{
int error;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -583,7 +583,7 @@ static int link_dinode(struct gfs2_inode *dip, const struct qstr *name,
goto fail_quota_locks;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- al->al_rgd->rd_length +
+ dip->i_rgd->rd_length +
2 * RES_DINODE +
RES_STATFS + RES_QUOTA, 0);
if (error)
@@ -613,8 +613,7 @@ fail_end_trans:
gfs2_trans_end(sdp);
fail_ipreserv:
- if (dip->i_alloc->al_rgd)
- gfs2_inplace_release(dip);
+ gfs2_inplace_release(dip);
fail_quota_locks:
gfs2_quota_unlock(dip);
@@ -624,31 +623,29 @@ fail:
return error;
}
-static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
- const struct qstr *qstr)
+int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
{
- int err;
- size_t len;
- void *value;
- char *name;
-
- err = security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
- &name, &value, &len);
-
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
+ const struct xattr *xattr;
+ int err = 0;
+
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = __gfs2_xattr_set(inode, xattr->name, xattr->value,
+ xattr->value_len, 0,
+ GFS2_EATYPE_SECURITY);
+ if (err < 0)
+ break;
}
-
- err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
- GFS2_EATYPE_SECURITY);
- kfree(value);
- kfree(name);
-
return err;
}
+static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
+ const struct qstr *qstr)
+{
+ return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr,
+ &gfs2_initxattrs, NULL);
+}
+
/**
* gfs2_create_inode - Create a new inode
* @dir: The parent directory
@@ -663,7 +660,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip,
static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
unsigned int mode, dev_t dev, const char *symname,
- unsigned int size)
+ unsigned int size, int excl)
{
const struct qstr *name = &dentry->d_name;
struct gfs2_holder ghs[2];
@@ -683,6 +680,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
goto fail;
error = create_ok(dip, name, mode);
+ if ((error == -EEXIST) && S_ISREG(mode) && !excl) {
+ inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+ gfs2_glock_dq_uninit(ghs);
+ d_instantiate(dentry, inode);
+ return IS_ERR(inode) ? PTR_ERR(inode) : 0;
+ }
if (error)
goto fail_gunlock;
@@ -725,21 +728,22 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
brelse(bh);
gfs2_trans_end(sdp);
- if (dip->i_alloc->al_rgd)
- gfs2_inplace_release(dip);
+ gfs2_inplace_release(dip);
gfs2_quota_unlock(dip);
gfs2_alloc_put(dip);
- gfs2_glock_dq_uninit_m(2, ghs);
mark_inode_dirty(inode);
+ gfs2_glock_dq_uninit_m(2, ghs);
d_instantiate(dentry, inode);
return 0;
fail_gunlock2:
gfs2_glock_dq_uninit(ghs + 1);
- if (inode && !IS_ERR(inode))
- iput(inode);
fail_gunlock:
gfs2_glock_dq_uninit(ghs);
+ if (inode && !IS_ERR(inode)) {
+ set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags);
+ iput(inode);
+ }
fail:
if (bh)
brelse(bh);
@@ -758,24 +762,10 @@ fail:
static int gfs2_create(struct inode *dir, struct dentry *dentry,
int mode, struct nameidata *nd)
{
- struct inode *inode;
- int ret;
-
- for (;;) {
- ret = gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0);
- if (ret != -EEXIST || (nd && (nd->flags & LOOKUP_EXCL)))
- return ret;
-
- inode = gfs2_lookupi(dir, &dentry->d_name, 0);
- if (inode) {
- if (!IS_ERR(inode))
- break;
- return PTR_ERR(inode);
- }
- }
-
- d_instantiate(dentry, inode);
- return 0;
+ int excl = 0;
+ if (nd && (nd->flags & LOOKUP_EXCL))
+ excl = 1;
+ return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl);
}
/**
@@ -792,13 +782,8 @@ static int gfs2_create(struct inode *dir, struct dentry *dentry,
static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
struct nameidata *nd)
{
- struct inode *inode = NULL;
-
- inode = gfs2_lookupi(dir, &dentry->d_name, 0);
- if (inode && IS_ERR(inode))
- return ERR_CAST(inode);
-
- if (inode) {
+ struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0);
+ if (inode && !IS_ERR(inode)) {
struct gfs2_glock *gl = GFS2_I(inode)->i_gl;
struct gfs2_holder gh;
int error;
@@ -808,11 +793,8 @@ static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry,
return ERR_PTR(error);
}
gfs2_glock_dq_uninit(&gh);
- return d_splice_alias(inode, dentry);
}
- d_add(dentry, inode);
-
- return NULL;
+ return d_splice_alias(inode, dentry);
}
/**
@@ -857,7 +839,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink == 0)
goto out_gunlock;
- error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -910,7 +892,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- gfs2_rg_blocks(al) +
+ gfs2_rg_blocks(dip) +
2 * RES_DINODE + RES_STATFS +
RES_QUOTA, 0);
if (error)
@@ -932,8 +914,9 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
gfs2_trans_add_bh(ip->i_gl, dibh, 1);
inc_nlink(&ip->i_inode);
ip->i_inode.i_ctime = CURRENT_TIME;
- gfs2_dinode_out(ip, dibh->b_data);
- mark_inode_dirty(&ip->i_inode);
+ ihold(inode);
+ d_instantiate(dentry, inode);
+ mark_inode_dirty(inode);
out_brelse:
brelse(dibh);
@@ -955,11 +938,6 @@ out_child:
out_parent:
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
- if (!error) {
- ihold(inode);
- d_instantiate(dentry, inode);
- mark_inode_dirty(inode);
- }
return error;
}
@@ -990,7 +968,7 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
if (IS_APPEND(&dip->i_inode))
return -EPERM;
- error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
if (error)
return error;
@@ -1032,8 +1010,6 @@ static int gfs2_unlink_inode(struct gfs2_inode *dip,
clear_nlink(inode);
else
drop_nlink(inode);
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_dinode_out(ip, bh->b_data);
mark_inode_dirty(inode);
if (inode->i_nlink == 0)
gfs2_unlink_di(inode);
@@ -1061,13 +1037,8 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
struct buffer_head *bh;
struct gfs2_holder ghs[3];
struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh;
int error;
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- return error;
-
gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1);
@@ -1124,7 +1095,6 @@ out_child:
gfs2_glock_dq(ghs);
out_parent:
gfs2_holder_uninit(ghs);
- gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -1147,7 +1117,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1)
return -ENAMETOOLONG;
- return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size);
+ return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0);
}
/**
@@ -1161,7 +1131,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
{
- return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0);
+ return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, 0, 0);
}
/**
@@ -1176,7 +1146,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode)
static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode,
dev_t dev)
{
- return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0);
+ return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0);
}
/*
@@ -1242,7 +1212,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_inode *ip = GFS2_I(odentry->d_inode);
struct gfs2_inode *nip = NULL;
struct gfs2_sbd *sdp = GFS2_SB(odir);
- struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh;
+ struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, };
struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
@@ -1256,10 +1226,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
return 0;
}
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- return error;
-
if (odip != ndip) {
error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE,
0, &r_gh);
@@ -1336,7 +1302,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
}
}
} else {
- error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC, 0);
+ error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
if (error)
goto out_gunlock;
@@ -1371,7 +1337,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
/* Check out the dir to be renamed */
if (dir_rename) {
- error = gfs2_permission(odentry->d_inode, MAY_WRITE, 0);
+ error = gfs2_permission(odentry->d_inode, MAY_WRITE);
if (error)
goto out_gunlock;
}
@@ -1396,12 +1362,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
al->al_requested = sdp->sd_max_dirres;
- error = gfs2_inplace_reserve_ri(ndip);
+ error = gfs2_inplace_reserve(ndip);
if (error)
goto out_gunlock_q;
error = gfs2_trans_begin(sdp, sdp->sd_max_dirres +
- gfs2_rg_blocks(al) +
+ gfs2_rg_blocks(ndip) +
4 * RES_DINODE + 4 * RES_LEAF +
RES_STATFS + RES_QUOTA + 4, 0);
if (error)
@@ -1467,7 +1433,6 @@ out_gunlock_r:
if (r_gh.gh_gl)
gfs2_glock_dq_uninit(&r_gh);
out:
- gfs2_glock_dq_uninit(&ri_gh);
return error;
}
@@ -1543,7 +1508,7 @@ static void gfs2_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
* Returns: errno
*/
-int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
+int gfs2_permission(struct inode *inode, int mask)
{
struct gfs2_inode *ip;
struct gfs2_holder i_gh;
@@ -1553,7 +1518,7 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
ip = GFS2_I(inode);
if (gfs2_glock_is_locked_by_me(ip->i_gl) == NULL) {
- if (flags & IPERM_FLAG_RCU)
+ if (mask & MAY_NOT_BLOCK)
return -ECHILD;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
if (error)
@@ -1564,28 +1529,17 @@ int gfs2_permission(struct inode *inode, int mask, unsigned int flags)
if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
error = -EACCES;
else
- error = generic_permission(inode, mask, flags, gfs2_check_acl);
+ error = generic_permission(inode, mask);
if (unlock)
gfs2_glock_dq_uninit(&i_gh);
return error;
}
-static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
{
- struct inode *inode = &ip->i_inode;
- struct buffer_head *dibh;
- int error;
-
- error = gfs2_meta_inode_buffer(ip, &dibh);
- if (error)
- return error;
-
setattr_copy(inode, attr);
mark_inode_dirty(inode);
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
return 0;
}
@@ -1597,19 +1551,19 @@ static int __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
* Returns: errno
*/
-int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
+int gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
{
int error;
if (current->journal_info)
- return __gfs2_setattr_simple(ip, attr);
+ return __gfs2_setattr_simple(inode, attr);
- error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE, 0);
+ error = gfs2_trans_begin(GFS2_SB(inode), RES_DINODE, 0);
if (error)
return error;
- error = __gfs2_setattr_simple(ip, attr);
- gfs2_trans_end(GFS2_SB(&ip->i_inode));
+ error = __gfs2_setattr_simple(inode, attr);
+ gfs2_trans_end(GFS2_SB(inode));
return error;
}
@@ -1647,7 +1601,7 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (error)
goto out_gunlock_q;
- error = gfs2_setattr_simple(ip, attr);
+ error = gfs2_setattr_simple(inode, attr);
if (error)
goto out_end_trans;
@@ -1703,12 +1657,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode))
error = gfs2_acl_chmod(ip, attr);
else
- error = gfs2_setattr_simple(ip, attr);
+ error = gfs2_setattr_simple(inode, attr);
out:
- gfs2_glock_dq_uninit(&i_gh);
if (!error)
mark_inode_dirty(inode);
+ gfs2_glock_dq_uninit(&i_gh);
return error;
}
@@ -1854,6 +1808,7 @@ const struct inode_operations gfs2_file_iops = {
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fiemap = gfs2_fiemap,
+ .get_acl = gfs2_get_acl,
};
const struct inode_operations gfs2_dir_iops = {
@@ -1874,6 +1829,7 @@ const struct inode_operations gfs2_dir_iops = {
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fiemap = gfs2_fiemap,
+ .get_acl = gfs2_get_acl,
};
const struct inode_operations gfs2_symlink_iops = {
@@ -1888,5 +1844,6 @@ const struct inode_operations gfs2_symlink_iops = {
.listxattr = gfs2_listxattr,
.removexattr = gfs2_removexattr,
.fiemap = gfs2_fiemap,
+ .get_acl = gfs2_get_acl,
};
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 3160607..276e7b5 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -108,8 +108,8 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
int is_root);
-extern int gfs2_permission(struct inode *inode, int mask, unsigned int flags);
-extern int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr);
+extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr);
extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 85c6292..5986464 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -624,9 +624,9 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags))
- submit_bh(WRITE_SYNC | REQ_META, bh);
+ submit_bh(WRITE_SYNC | REQ_META | REQ_PRIO, bh);
else
- submit_bh(WRITE_FLUSH_FUA | REQ_META, bh);
+ submit_bh(WRITE_FLUSH_FUA | REQ_META | REQ_PRIO, bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c133253..465e49a 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -60,6 +60,29 @@ static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
trace_gfs2_pin(bd, 1);
}
+static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
+{
+ return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
+}
+
+static void maybe_release_space(struct gfs2_bufdata *bd)
+{
+ struct gfs2_glock *gl = bd->bd_gl;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct gfs2_rgrpd *rgd = gl->gl_object;
+ unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
+ struct gfs2_bitmap *bi = rgd->rd_bits + index;
+
+ if (bi->bi_clone == 0)
+ return;
+ if (sdp->sd_args.ar_discard)
+ gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi);
+ memcpy(bi->bi_clone + bi->bi_offset,
+ bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
+ clear_bit(GBF_FULL, &bi->bi_flags);
+ rgd->rd_free_clone = rgd->rd_free;
+}
+
/**
* gfs2_unpin - Unpin a buffer
* @sdp: the filesystem the buffer belongs to
@@ -81,6 +104,9 @@ static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
mark_buffer_dirty(bh);
clear_buffer_pinned(bh);
+ if (buffer_is_rgrp(bd))
+ maybe_release_space(bd);
+
spin_lock(&sdp->sd_ail_lock);
if (bd->bd_ail) {
list_del(&bd->bd_ail_st_list);
@@ -464,42 +490,6 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
gfs2_revoke_clean(sdp);
}
-static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
-{
- struct gfs2_rgrpd *rgd;
- struct gfs2_trans *tr = current->journal_info;
-
- tr->tr_touched = 1;
-
- rgd = container_of(le, struct gfs2_rgrpd, rd_le);
-
- gfs2_log_lock(sdp);
- if (!list_empty(&le->le_list)){
- gfs2_log_unlock(sdp);
- return;
- }
- gfs2_rgrp_bh_hold(rgd);
- sdp->sd_log_num_rg++;
- list_add(&le->le_list, &sdp->sd_log_le_rg);
- gfs2_log_unlock(sdp);
-}
-
-static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
-{
- struct list_head *head = &sdp->sd_log_le_rg;
- struct gfs2_rgrpd *rgd;
-
- while (!list_empty(head)) {
- rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
- list_del_init(&rgd->rd_le.le_list);
- sdp->sd_log_num_rg--;
-
- gfs2_rgrp_repolish_clones(rgd);
- gfs2_rgrp_bh_put(rgd);
- }
- gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
-}
-
/**
* databuf_lo_add - Add a databuf to the transaction.
*
@@ -695,8 +685,6 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
brelse(bh_log);
brelse(bh_ip);
- if (error)
- break;
sdp->sd_replayed_blocks++;
}
@@ -761,8 +749,6 @@ const struct gfs2_log_operations gfs2_revoke_lops = {
};
const struct gfs2_log_operations gfs2_rg_lops = {
- .lo_add = rg_lo_add,
- .lo_after_commit = rg_lo_after_commit,
.lo_name = "rg",
};
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c
index c2b34cd..8a139ff 100644
--- a/fs/gfs2/main.c
+++ b/fs/gfs2/main.c
@@ -16,7 +16,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include "gfs2.h"
#include "incore.h"
@@ -41,6 +41,7 @@ static void gfs2_init_inode_once(void *foo)
init_rwsem(&ip->i_rw_mutex);
INIT_LIST_HEAD(&ip->i_trunc_list);
ip->i_alloc = NULL;
+ ip->i_hash_cache = NULL;
}
static void gfs2_init_glock_once(void *foo)
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 747238c..be29858 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -37,7 +37,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
{
struct buffer_head *bh, *head;
int nr_underway = 0;
- int write_op = REQ_META |
+ int write_op = REQ_META | REQ_PRIO |
(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
BUG_ON(!PageLocked(page));
@@ -225,7 +225,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
}
bh->b_end_io = end_buffer_read_sync;
get_bh(bh);
- submit_bh(READ_SYNC | REQ_META, bh);
+ submit_bh(READ_SYNC | REQ_META | REQ_PRIO, bh);
if (!(flags & DIO_WAIT))
return 0;
@@ -435,7 +435,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen)
if (buffer_uptodate(first_bh))
goto out;
if (!buffer_locked(first_bh))
- ll_rw_block(READ_SYNC | REQ_META, 1, &first_bh);
+ ll_rw_block(READ_SYNC | REQ_META | REQ_PRIO, 1, &first_bh);
dblock++;
extlen--;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index fa780e6..cb23c2b 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -14,6 +14,7 @@
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/kthread.h>
+#include <linux/export.h>
#include <linux/namei.h>
#include <linux/mount.h>
#include <linux/gfs2_ondisk.h>
@@ -77,8 +78,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
spin_lock_init(&sdp->sd_rindex_spin);
mutex_init(&sdp->sd_rindex_mutex);
- INIT_LIST_HEAD(&sdp->sd_rindex_list);
- INIT_LIST_HEAD(&sdp->sd_rindex_mru_list);
+ sdp->sd_rindex_tree.rb_node = NULL;
INIT_LIST_HEAD(&sdp->sd_jindex_list);
spin_lock_init(&sdp->sd_jindex_spin);
@@ -224,7 +224,7 @@ static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector, int silent)
bio->bi_end_io = end_bio_io_page;
bio->bi_private = page;
- submit_bio(READ_SYNC | REQ_META, bio);
+ submit_bio(READ_SYNC | REQ_META | REQ_PRIO, bio);
wait_on_page_locked(page);
bio_put(bio);
if (!PageUptodate(page)) {
@@ -652,7 +652,6 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
fs_err(sdp, "can't lookup journal index: %d\n", error);
return PTR_ERR(sdp->sd_jindex);
}
- ip = GFS2_I(sdp->sd_jindex);
/* Load in the journal index special file */
@@ -764,7 +763,6 @@ fail:
static int init_inodes(struct gfs2_sbd *sdp, int undo)
{
int error = 0;
- struct gfs2_inode *ip;
struct inode *master = sdp->sd_master_dir->d_inode;
if (undo)
@@ -789,7 +787,6 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo)
fs_err(sdp, "can't get resource index inode: %d\n", error);
goto fail_statfs;
}
- ip = GFS2_I(sdp->sd_rindex);
sdp->sd_rindex_uptodate = 0;
/* Read in the quota inode */
@@ -1094,6 +1091,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent
if (sdp->sd_args.ar_nobarrier)
set_bit(SDF_NOBARRIERS, &sdp->sd_flags);
+ sb->s_flags |= MS_NOSEC;
sb->s_magic = GFS2_MAGIC;
sb->s_op = &gfs2_super_ops;
sb->s_d_op = &gfs2_dops;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 42e8d23..7e528dc 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -638,15 +638,18 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
unsigned long index = loc >> PAGE_CACHE_SHIFT;
unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
unsigned blocksize, iblock, pos;
- struct buffer_head *bh, *dibh;
+ struct buffer_head *bh;
struct page *page;
void *kaddr, *ptr;
struct gfs2_quota q, *qp;
int err, nbytes;
u64 size;
- if (gfs2_is_stuffed(ip))
- gfs2_unstuff_dinode(ip, NULL);
+ if (gfs2_is_stuffed(ip)) {
+ err = gfs2_unstuff_dinode(ip, NULL);
+ if (err)
+ return err;
+ }
memset(&q, 0, sizeof(struct gfs2_quota));
err = gfs2_internal_read(ip, NULL, (char *)&q, &loc, sizeof(q));
@@ -709,7 +712,7 @@ get_a_page:
set_buffer_uptodate(bh);
if (!buffer_uptodate(bh)) {
- ll_rw_block(READ_META, 1, &bh);
+ ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
goto unlock_out;
@@ -736,22 +739,13 @@ get_a_page:
goto get_a_page;
}
- /* Update the disk inode timestamp and size (if extended) */
- err = gfs2_meta_inode_buffer(ip, &dibh);
- if (err)
- goto out;
-
size = loc + sizeof(struct gfs2_quota);
if (size > inode->i_size)
i_size_write(inode, size);
inode->i_mtime = inode->i_atime = CURRENT_TIME;
- gfs2_trans_add_bh(ip->i_gl, dibh, 1);
- gfs2_dinode_out(ip, dibh->b_data);
- brelse(dibh);
mark_inode_dirty(inode);
-
-out:
return err;
+
unlock_out:
unlock_page(page);
page_cache_release(page);
@@ -822,7 +816,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
goto out_alloc;
if (nalloc)
- blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS;
+ blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS;
error = gfs2_trans_begin(sdp, blocks, 0);
if (error)
@@ -936,7 +930,9 @@ int gfs2_quota_lock(struct gfs2_inode *ip, u32 uid, u32 gid)
unsigned int x;
int error = 0;
- gfs2_quota_hold(ip, uid, gid);
+ error = gfs2_quota_hold(ip, uid, gid);
+ if (error)
+ return error;
if (capable(CAP_SYS_RESOURCE) ||
sdp->sd_args.ar_quota != GFS2_QUOTA_ON)
@@ -1607,7 +1603,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
error = gfs2_inplace_reserve(ip);
if (error)
goto out_alloc;
- blocks += gfs2_rg_blocks(al);
+ blocks += gfs2_rg_blocks(ip);
}
/* Some quotas span block boundaries and can update two blocks,
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 9b780df..96bd6d7 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -15,6 +15,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/prefetch.h>
#include <linux/blkdev.h>
+#include <linux/rbtree.h>
#include "gfs2.h"
#include "incore.h"
@@ -328,18 +329,22 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
{
- struct gfs2_rgrpd *rgd;
+ struct rb_node **newn;
+ struct gfs2_rgrpd *cur;
spin_lock(&sdp->sd_rindex_spin);
-
- list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) {
- if (rgrp_contains_block(rgd, blk)) {
- list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
+ newn = &sdp->sd_rindex_tree.rb_node;
+ while (*newn) {
+ cur = rb_entry(*newn, struct gfs2_rgrpd, rd_node);
+ if (blk < cur->rd_addr)
+ newn = &((*newn)->rb_left);
+ else if (blk >= cur->rd_data0 + cur->rd_data)
+ newn = &((*newn)->rb_right);
+ else {
spin_unlock(&sdp->sd_rindex_spin);
- return rgd;
+ return cur;
}
}
-
spin_unlock(&sdp->sd_rindex_spin);
return NULL;
@@ -354,8 +359,15 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)
struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
{
- gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list));
- return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list);
+ const struct rb_node *n;
+ struct gfs2_rgrpd *rgd;
+
+ spin_lock(&sdp->sd_rindex_spin);
+ n = rb_first(&sdp->sd_rindex_tree);
+ rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
+ spin_unlock(&sdp->sd_rindex_spin);
+
+ return rgd;
}
/**
@@ -367,47 +379,60 @@ struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)
struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)
{
- if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list)
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
+ const struct rb_node *n;
+
+ spin_lock(&sdp->sd_rindex_spin);
+ n = rb_next(&rgd->rd_node);
+ if (n == NULL)
+ n = rb_first(&sdp->sd_rindex_tree);
+
+ if (unlikely(&rgd->rd_node == n)) {
+ spin_unlock(&sdp->sd_rindex_spin);
return NULL;
- return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list);
+ }
+ rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
+ spin_unlock(&sdp->sd_rindex_spin);
+ return rgd;
+}
+
+void gfs2_free_clones(struct gfs2_rgrpd *rgd)
+{
+ int x;
+
+ for (x = 0; x < rgd->rd_length; x++) {
+ struct gfs2_bitmap *bi = rgd->rd_bits + x;
+ kfree(bi->bi_clone);
+ bi->bi_clone = NULL;
+ }
}
-static void clear_rgrpdi(struct gfs2_sbd *sdp)
+void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
{
- struct list_head *head;
+ struct rb_node *n;
struct gfs2_rgrpd *rgd;
struct gfs2_glock *gl;
- spin_lock(&sdp->sd_rindex_spin);
- sdp->sd_rindex_forward = NULL;
- spin_unlock(&sdp->sd_rindex_spin);
-
- head = &sdp->sd_rindex_list;
- while (!list_empty(head)) {
- rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list);
+ while ((n = rb_first(&sdp->sd_rindex_tree))) {
+ rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);
gl = rgd->rd_gl;
- list_del(&rgd->rd_list);
- list_del(&rgd->rd_list_mru);
+ rb_erase(n, &sdp->sd_rindex_tree);
if (gl) {
+ spin_lock(&gl->gl_spin);
gl->gl_object = NULL;
+ spin_unlock(&gl->gl_spin);
gfs2_glock_add_to_lru(gl);
gfs2_glock_put(gl);
}
+ gfs2_free_clones(rgd);
kfree(rgd->rd_bits);
kmem_cache_free(gfs2_rgrpd_cachep, rgd);
}
}
-void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
-{
- mutex_lock(&sdp->sd_rindex_mutex);
- clear_rgrpdi(sdp);
- mutex_unlock(&sdp->sd_rindex_mutex);
-}
-
static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
{
printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
@@ -524,22 +549,34 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)
return total_data;
}
-static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf)
+static void rgd_insert(struct gfs2_rgrpd *rgd)
{
- const struct gfs2_rindex *str = buf;
+ struct gfs2_sbd *sdp = rgd->rd_sbd;
+ struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*newn) {
+ struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd,
+ rd_node);
+
+ parent = *newn;
+ if (rgd->rd_addr < cur->rd_addr)
+ newn = &((*newn)->rb_left);
+ else if (rgd->rd_addr > cur->rd_addr)
+ newn = &((*newn)->rb_right);
+ else
+ return;
+ }
- rgd->rd_addr = be64_to_cpu(str->ri_addr);
- rgd->rd_length = be32_to_cpu(str->ri_length);
- rgd->rd_data0 = be64_to_cpu(str->ri_data0);
- rgd->rd_data = be32_to_cpu(str->ri_data);
- rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes);
+ rb_link_node(&rgd->rd_node, parent, newn);
+ rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree);
}
/**
* read_rindex_entry - Pull in a new resource index entry from the disk
* @gl: The glock covering the rindex inode
*
- * Returns: 0 on success, error code otherwise
+ * Returns: 0 on success, > 0 on EOF, error code otherwise
*/
static int read_rindex_entry(struct gfs2_inode *ip,
@@ -547,44 +584,53 @@ static int read_rindex_entry(struct gfs2_inode *ip,
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex);
- char buf[sizeof(struct gfs2_rindex)];
+ struct gfs2_rindex buf;
int error;
struct gfs2_rgrpd *rgd;
- error = gfs2_internal_read(ip, ra_state, buf, &pos,
+ if (pos >= i_size_read(&ip->i_inode))
+ return 1;
+
+ error = gfs2_internal_read(ip, ra_state, (char *)&buf, &pos,
sizeof(struct gfs2_rindex));
- if (!error)
- return 0;
- if (error != sizeof(struct gfs2_rindex)) {
- if (error > 0)
- error = -EIO;
- return error;
- }
+
+ if (error != sizeof(struct gfs2_rindex))
+ return (error == 0) ? 1 : error;
rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);
error = -ENOMEM;
if (!rgd)
return error;
- mutex_init(&rgd->rd_mutex);
- lops_init_le(&rgd->rd_le, &gfs2_rg_lops);
rgd->rd_sbd = sdp;
+ rgd->rd_addr = be64_to_cpu(buf.ri_addr);
+ rgd->rd_length = be32_to_cpu(buf.ri_length);
+ rgd->rd_data0 = be64_to_cpu(buf.ri_data0);
+ rgd->rd_data = be32_to_cpu(buf.ri_data);
+ rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
- list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list);
- list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
-
- gfs2_rindex_in(rgd, buf);
error = compute_bitstructs(rgd);
if (error)
- return error;
+ goto fail;
error = gfs2_glock_get(sdp, rgd->rd_addr,
&gfs2_rgrp_glops, CREATE, &rgd->rd_gl);
if (error)
- return error;
+ goto fail;
rgd->rd_gl->gl_object = rgd;
rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
+ if (rgd->rd_data > sdp->sd_max_rg_data)
+ sdp->sd_max_rg_data = rgd->rd_data;
+ spin_lock(&sdp->sd_rindex_spin);
+ rgd_insert(rgd);
+ sdp->sd_rgrps++;
+ spin_unlock(&sdp->sd_rindex_spin);
+ return error;
+
+fail:
+ kfree(rgd->rd_bits);
+ kmem_cache_free(gfs2_rgrpd_cachep, rgd);
return error;
}
@@ -595,40 +641,28 @@ static int read_rindex_entry(struct gfs2_inode *ip,
* Returns: 0 on successful update, error code otherwise
*/
-int gfs2_ri_update(struct gfs2_inode *ip)
+static int gfs2_ri_update(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct inode *inode = &ip->i_inode;
struct file_ra_state ra_state;
- u64 rgrp_count = i_size_read(inode);
- struct gfs2_rgrpd *rgd;
- unsigned int max_data = 0;
int error;
- do_div(rgrp_count, sizeof(struct gfs2_rindex));
- clear_rgrpdi(sdp);
-
file_ra_state_init(&ra_state, inode->i_mapping);
- for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) {
+ do {
error = read_rindex_entry(ip, &ra_state);
- if (error) {
- clear_rgrpdi(sdp);
- return error;
- }
- }
+ } while (error == 0);
+
+ if (error < 0)
+ return error;
- list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list)
- if (rgd->rd_data > max_data)
- max_data = rgd->rd_data;
- sdp->sd_max_rg_data = max_data;
sdp->sd_rindex_uptodate = 1;
return 0;
}
/**
- * gfs2_rindex_hold - Grab a lock on the rindex
+ * gfs2_rindex_update - Update the rindex if required
* @sdp: The GFS2 superblock
- * @ri_gh: the glock holder
*
* We grab a lock on the rindex inode to make sure that it doesn't
* change whilst we are performing an operation. We keep this lock
@@ -640,30 +674,29 @@ int gfs2_ri_update(struct gfs2_inode *ip)
* special file, which might have been updated if someone expanded the
* filesystem (via gfs2_grow utility), which adds new resource groups.
*
- * Returns: 0 on success, error code otherwise
+ * Returns: 0 on succeess, error code otherwise
*/
-int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh)
+int gfs2_rindex_update(struct gfs2_sbd *sdp)
{
struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
struct gfs2_glock *gl = ip->i_gl;
- int error;
-
- error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh);
- if (error)
- return error;
+ struct gfs2_holder ri_gh;
+ int error = 0;
/* Read new copy from disk if we don't have the latest */
if (!sdp->sd_rindex_uptodate) {
mutex_lock(&sdp->sd_rindex_mutex);
- if (!sdp->sd_rindex_uptodate) {
+ error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);
+ if (error)
+ return error;
+ if (!sdp->sd_rindex_uptodate)
error = gfs2_ri_update(ip);
- if (error)
- gfs2_glock_dq_uninit(ri_gh);
- }
+ gfs2_glock_dq_uninit(&ri_gh);
mutex_unlock(&sdp->sd_rindex_mutex);
}
+
return error;
}
@@ -694,7 +727,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
}
/**
- * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
+ * gfs2_rgrp_go_lock - Read in a RG's header and bitmaps
* @rgd: the struct gfs2_rgrpd describing the RG to read in
*
* Read in all of a Resource Group's header and bitmap blocks.
@@ -703,8 +736,9 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)
* Returns: errno
*/
-int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
+int gfs2_rgrp_go_lock(struct gfs2_holder *gh)
{
+ struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
struct gfs2_sbd *sdp = rgd->rd_sbd;
struct gfs2_glock *gl = rgd->rd_gl;
unsigned int length = rgd->rd_length;
@@ -712,17 +746,6 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
unsigned int x, y;
int error;
- mutex_lock(&rgd->rd_mutex);
-
- spin_lock(&sdp->sd_rindex_spin);
- if (rgd->rd_bh_count) {
- rgd->rd_bh_count++;
- spin_unlock(&sdp->sd_rindex_spin);
- mutex_unlock(&rgd->rd_mutex);
- return 0;
- }
- spin_unlock(&sdp->sd_rindex_spin);
-
for (x = 0; x < length; x++) {
bi = rgd->rd_bits + x;
error = gfs2_meta_read(gl, rgd->rd_addr + x, 0, &bi->bi_bh);
@@ -747,15 +770,9 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
+ rgd->rd_free_clone = rgd->rd_free;
}
- spin_lock(&sdp->sd_rindex_spin);
- rgd->rd_free_clone = rgd->rd_free;
- rgd->rd_bh_count++;
- spin_unlock(&sdp->sd_rindex_spin);
-
- mutex_unlock(&rgd->rd_mutex);
-
return 0;
fail:
@@ -765,52 +782,32 @@ fail:
bi->bi_bh = NULL;
gfs2_assert_warn(sdp, !bi->bi_clone);
}
- mutex_unlock(&rgd->rd_mutex);
return error;
}
-void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd)
-{
- struct gfs2_sbd *sdp = rgd->rd_sbd;
-
- spin_lock(&sdp->sd_rindex_spin);
- gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
- rgd->rd_bh_count++;
- spin_unlock(&sdp->sd_rindex_spin);
-}
-
/**
- * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get()
+ * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get()
* @rgd: the struct gfs2_rgrpd describing the RG to read in
*
*/
-void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd)
+void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
{
- struct gfs2_sbd *sdp = rgd->rd_sbd;
+ struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
int x, length = rgd->rd_length;
- spin_lock(&sdp->sd_rindex_spin);
- gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count);
- if (--rgd->rd_bh_count) {
- spin_unlock(&sdp->sd_rindex_spin);
- return;
- }
-
for (x = 0; x < length; x++) {
struct gfs2_bitmap *bi = rgd->rd_bits + x;
- kfree(bi->bi_clone);
- bi->bi_clone = NULL;
brelse(bi->bi_bh);
bi->bi_bh = NULL;
}
- spin_unlock(&sdp->sd_rindex_spin);
}
-static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
- const struct gfs2_bitmap *bi)
+void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+ struct buffer_head *bh,
+ const struct gfs2_bitmap *bi)
{
struct super_block *sb = sdp->sd_vfs;
struct block_device *bdev = sb->s_bdev;
@@ -823,7 +820,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
unsigned int x;
for (x = 0; x < bi->bi_len; x++) {
- const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x;
+ const u8 *orig = bh->b_data + bi->bi_offset + x;
const u8 *clone = bi->bi_clone + bi->bi_offset + x;
u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1));
diff &= 0x55;
@@ -862,28 +859,6 @@ fail:
sdp->sd_args.ar_discard = 0;
}
-void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
-{
- struct gfs2_sbd *sdp = rgd->rd_sbd;
- unsigned int length = rgd->rd_length;
- unsigned int x;
-
- for (x = 0; x < length; x++) {
- struct gfs2_bitmap *bi = rgd->rd_bits + x;
- if (!bi->bi_clone)
- continue;
- if (sdp->sd_args.ar_discard)
- gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi);
- clear_bit(GBF_FULL, &bi->bi_flags);
- memcpy(bi->bi_clone + bi->bi_offset,
- bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);
- }
-
- spin_lock(&sdp->sd_rindex_spin);
- rgd->rd_free_clone = rgd->rd_free;
- spin_unlock(&sdp->sd_rindex_spin);
-}
-
/**
* gfs2_alloc_get - get the struct gfs2_alloc structure for an inode
* @ip: the incore GFS2 inode structure
@@ -893,38 +868,35 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ int error;
BUG_ON(ip->i_alloc != NULL);
ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS);
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ fs_warn(sdp, "rindex update returns %d\n", error);
return ip->i_alloc;
}
/**
* try_rgrp_fit - See if a given reservation will fit in a given RG
* @rgd: the RG data
- * @al: the struct gfs2_alloc structure describing the reservation
+ * @ip: the inode
*
* If there's room for the requested blocks to be allocated from the RG:
- * Sets the $al_rgd field in @al.
*
* Returns: 1 on success (it fits), 0 on failure (it doesn't fit)
*/
-static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
+static int try_rgrp_fit(const struct gfs2_rgrpd *rgd, const struct gfs2_inode *ip)
{
- struct gfs2_sbd *sdp = rgd->rd_sbd;
- int ret = 0;
+ const struct gfs2_alloc *al = ip->i_alloc;
if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR))
return 0;
-
- spin_lock(&sdp->sd_rindex_spin);
- if (rgd->rd_free_clone >= al->al_requested) {
- al->al_rgd = rgd;
- ret = 1;
- }
- spin_unlock(&sdp->sd_rindex_spin);
-
- return ret;
+ if (rgd->rd_free_clone >= al->al_requested)
+ return 1;
+ return 0;
}
/**
@@ -992,76 +964,6 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
}
/**
- * recent_rgrp_next - get next RG from "recent" list
- * @cur_rgd: current rgrp
- *
- * Returns: The next rgrp in the recent list
- */
-
-static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd)
-{
- struct gfs2_sbd *sdp = cur_rgd->rd_sbd;
- struct list_head *head;
- struct gfs2_rgrpd *rgd;
-
- spin_lock(&sdp->sd_rindex_spin);
- head = &sdp->sd_rindex_mru_list;
- if (unlikely(cur_rgd->rd_list_mru.next == head)) {
- spin_unlock(&sdp->sd_rindex_spin);
- return NULL;
- }
- rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru);
- spin_unlock(&sdp->sd_rindex_spin);
- return rgd;
-}
-
-/**
- * forward_rgrp_get - get an rgrp to try next from full list
- * @sdp: The GFS2 superblock
- *
- * Returns: The rgrp to try next
- */
-
-static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp)
-{
- struct gfs2_rgrpd *rgd;
- unsigned int journals = gfs2_jindex_size(sdp);
- unsigned int rg = 0, x;
-
- spin_lock(&sdp->sd_rindex_spin);
-
- rgd = sdp->sd_rindex_forward;
- if (!rgd) {
- if (sdp->sd_rgrps >= journals)
- rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals;
-
- for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg;
- x++, rgd = gfs2_rgrpd_get_next(rgd))
- /* Do Nothing */;
-
- sdp->sd_rindex_forward = rgd;
- }
-
- spin_unlock(&sdp->sd_rindex_spin);
-
- return rgd;
-}
-
-/**
- * forward_rgrp_set - set the forward rgrp pointer
- * @sdp: the filesystem
- * @rgd: The new forward rgrp
- *
- */
-
-static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
-{
- spin_lock(&sdp->sd_rindex_spin);
- sdp->sd_rindex_forward = rgd;
- spin_unlock(&sdp->sd_rindex_spin);
-}
-
-/**
* get_local_rgrp - Choose and lock a rgrp for allocation
* @ip: the inode to reserve space for
* @rgp: the chosen and locked rgrp
@@ -1076,14 +978,18 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd, *begin = NULL;
struct gfs2_alloc *al = ip->i_alloc;
- int flags = LM_FLAG_TRY;
- int skipped = 0;
- int loops = 0;
int error, rg_locked;
+ int loops = 0;
- rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
+ if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal))
+ rgd = begin = ip->i_rgd;
+ else
+ rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal);
+
+ if (rgd == NULL)
+ return -EBADSLT;
- while (rgd) {
+ while (loops < 3) {
rg_locked = 0;
if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
@@ -1095,92 +1001,36 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
}
switch (error) {
case 0:
- if (try_rgrp_fit(rgd, al))
- goto out;
+ if (try_rgrp_fit(rgd, ip)) {
+ ip->i_rgd = rgd;
+ return 0;
+ }
if (rgd->rd_flags & GFS2_RDF_CHECK)
try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
if (!rg_locked)
gfs2_glock_dq_uninit(&al->al_rgd_gh);
/* fall through */
case GLR_TRYFAILED:
- rgd = recent_rgrp_next(rgd);
- break;
-
- default:
- return error;
- }
- }
-
- /* Go through full list of rgrps */
-
- begin = rgd = forward_rgrp_get(sdp);
-
- for (;;) {
- rg_locked = 0;
-
- if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) {
- rg_locked = 1;
- error = 0;
- } else {
- error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags,
- &al->al_rgd_gh);
- }
- switch (error) {
- case 0:
- if (try_rgrp_fit(rgd, al))
- goto out;
- if (rgd->rd_flags & GFS2_RDF_CHECK)
- try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
- if (!rg_locked)
- gfs2_glock_dq_uninit(&al->al_rgd_gh);
- break;
-
- case GLR_TRYFAILED:
- skipped++;
+ rgd = gfs2_rgrpd_get_next(rgd);
+ if (rgd == begin)
+ loops++;
break;
-
default:
return error;
}
-
- rgd = gfs2_rgrpd_get_next(rgd);
- if (!rgd)
- rgd = gfs2_rgrpd_get_first(sdp);
-
- if (rgd == begin) {
- if (++loops >= 3)
- return -ENOSPC;
- if (!skipped)
- loops++;
- flags = 0;
- if (loops == 2)
- gfs2_log_flush(sdp, NULL);
- }
- }
-
-out:
- if (begin) {
- spin_lock(&sdp->sd_rindex_spin);
- list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list);
- spin_unlock(&sdp->sd_rindex_spin);
- rgd = gfs2_rgrpd_get_next(rgd);
- if (!rgd)
- rgd = gfs2_rgrpd_get_first(sdp);
- forward_rgrp_set(sdp, rgd);
}
- return 0;
+ return -ENOSPC;
}
/**
- * gfs2_inplace_reserve_i - Reserve space in the filesystem
+ * gfs2_inplace_reserve - Reserve space in the filesystem
* @ip: the inode to reserve space for
*
* Returns: errno
*/
-int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
- char *file, unsigned int line)
+int gfs2_inplace_reserve(struct gfs2_inode *ip)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_alloc *al = ip->i_alloc;
@@ -1191,45 +1041,22 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
if (gfs2_assert_warn(sdp, al->al_requested))
return -EINVAL;
- if (hold_rindex) {
- /* We need to hold the rindex unless the inode we're using is
- the rindex itself, in which case it's already held. */
- if (ip != GFS2_I(sdp->sd_rindex))
- error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
- else if (!sdp->sd_rgrps) /* We may not have the rindex read
- in, so: */
- error = gfs2_ri_update(ip);
- if (error)
- return error;
- }
-
-try_again:
do {
error = get_local_rgrp(ip, &last_unlinked);
- /* If there is no space, flushing the log may release some */
- if (error) {
- if (ip == GFS2_I(sdp->sd_rindex) &&
- !sdp->sd_rindex_uptodate) {
- error = gfs2_ri_update(ip);
- if (error)
- return error;
- goto try_again;
- }
- gfs2_log_flush(sdp, NULL);
+ if (error != -ENOSPC)
+ break;
+ /* Check that fs hasn't grown if writing to rindex */
+ if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {
+ error = gfs2_ri_update(ip);
+ if (error)
+ break;
+ continue;
}
- } while (error && tries++ < 3);
+ /* Flushing the log may release space */
+ gfs2_log_flush(sdp, NULL);
+ } while (tries++ < 3);
- if (error) {
- if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
- gfs2_glock_dq_uninit(&al->al_ri_gh);
- return error;
- }
-
- /* no error, so we have the rgrp set in the inode's allocation. */
- al->al_file = file;
- al->al_line = line;
-
- return 0;
+ return error;
}
/**
@@ -1241,20 +1068,10 @@ try_again:
void gfs2_inplace_release(struct gfs2_inode *ip)
{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_alloc *al = ip->i_alloc;
- if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1)
- fs_warn(sdp, "al_alloced = %u, al_requested = %u "
- "al_file = %s, al_line = %u\n",
- al->al_alloced, al->al_requested, al->al_file,
- al->al_line);
-
- al->al_rgd = NULL;
if (al->al_rgd_gh.gh_gl)
gfs2_glock_dq_uninit(&al->al_rgd_gh);
- if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl)
- gfs2_glock_dq_uninit(&al->al_ri_gh);
}
/**
@@ -1352,6 +1169,7 @@ do_search:
/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone
bitmaps, so we must search the originals for that. */
buffer = bi->bi_bh->b_data + bi->bi_offset;
+ WARN_ON(!buffer_uptodate(bi->bi_bh));
if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone)
buffer = bi->bi_clone + bi->bi_offset;
@@ -1371,6 +1189,7 @@ skip:
if (blk == BFITNOENT)
return blk;
+
*n = 1;
if (old_state == new_state)
goto out;
@@ -1503,7 +1322,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
if (al == NULL)
return -ECANCELED;
- rgd = al->al_rgd;
+ rgd = ip->i_rgd;
if (rgrp_contains_block(rgd, ip->i_goal))
goal = ip->i_goal - rgd->rd_data0;
@@ -1518,7 +1337,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
rgd->rd_last_alloc = blk;
block = rgd->rd_data0 + blk;
- ip->i_goal = block;
+ ip->i_goal = block + *n - 1;
error = gfs2_meta_inode_buffer(ip, &dibh);
if (error == 0) {
struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data;
@@ -1539,9 +1358,7 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n)
gfs2_statfs_change(sdp, 0, -(s64)*n, 0);
gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid);
- spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone -= *n;
- spin_unlock(&sdp->sd_rindex_spin);
trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED);
*bn = block;
return 0;
@@ -1564,7 +1381,7 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
{
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
struct gfs2_alloc *al = dip->i_alloc;
- struct gfs2_rgrpd *rgd = al->al_rgd;
+ struct gfs2_rgrpd *rgd = dip->i_rgd;
u32 blk;
u64 block;
unsigned int n = 1;
@@ -1594,9 +1411,7 @@ int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation)
gfs2_statfs_change(sdp, 0, -1, +1);
gfs2_trans_add_unrevoke(sdp, block, 1);
- spin_lock(&sdp->sd_rindex_spin);
rgd->rd_free_clone--;
- spin_unlock(&sdp->sd_rindex_spin);
trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE);
*bn = block;
return 0;
@@ -1607,14 +1422,15 @@ rgrp_error:
}
/**
- * gfs2_free_data - free a contiguous run of data block(s)
+ * __gfs2_free_blocks - free a contiguous run of block(s)
* @ip: the inode these blocks are being freed from
* @bstart: first block of a run of contiguous blocks
* @blen: the length of the block run
+ * @meta: 1 if the blocks represent metadata
*
*/
-void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
+void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
@@ -1628,57 +1444,12 @@ void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- gfs2_trans_add_rg(rgd);
-
/* Directories keep their data in the metadata address space */
- if (ip->i_depth)
+ if (meta || ip->i_depth)
gfs2_meta_wipe(ip, bstart, blen);
}
/**
- * gfs2_free_data - free a contiguous run of data block(s)
- * @ip: the inode these blocks are being freed from
- * @bstart: first block of a run of contiguous blocks
- * @blen: the length of the block run
- *
- */
-
-void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-
- __gfs2_free_data(ip, bstart, blen);
- gfs2_statfs_change(sdp, 0, +blen, 0);
- gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
-}
-
-/**
- * gfs2_free_meta - free a contiguous run of data block(s)
- * @ip: the inode these blocks are being freed from
- * @bstart: first block of a run of contiguous blocks
- * @blen: the length of the block run
- *
- */
-
-void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
-{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- struct gfs2_rgrpd *rgd;
-
- rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);
- if (!rgd)
- return;
- trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE);
- rgd->rd_free += blen;
-
- gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
- gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
-
- gfs2_trans_add_rg(rgd);
- gfs2_meta_wipe(ip, bstart, blen);
-}
-
-/**
* gfs2_free_meta - free a contiguous run of data block(s)
* @ip: the inode these blocks are being freed from
* @bstart: first block of a run of contiguous blocks
@@ -1690,7 +1461,7 @@ void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- __gfs2_free_meta(ip, bstart, blen);
+ __gfs2_free_blocks(ip, bstart, blen, 1);
gfs2_statfs_change(sdp, 0, +blen, 0);
gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid);
}
@@ -1708,7 +1479,6 @@ void gfs2_unlink_di(struct inode *inode)
trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED);
gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1);
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
- gfs2_trans_add_rg(rgd);
}
static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
@@ -1730,7 +1500,6 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)
gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
gfs2_statfs_change(sdp, 0, +1, -1);
- gfs2_trans_add_rg(rgd);
}
@@ -1756,41 +1525,33 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
{
struct gfs2_rgrpd *rgd;
- struct gfs2_holder ri_gh, rgd_gh;
- struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);
- int ri_locked = 0;
+ struct gfs2_holder rgd_gh;
int error;
- if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- goto fail;
- ri_locked = 1;
- }
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ return error;
error = -EINVAL;
rgd = gfs2_blk2rgrpd(sdp, no_addr);
if (!rgd)
- goto fail_rindex;
+ goto fail;
error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);
if (error)
- goto fail_rindex;
+ goto fail;
if (gfs2_get_block_type(rgd, no_addr) != type)
error = -ESTALE;
gfs2_glock_dq_uninit(&rgd_gh);
-fail_rindex:
- if (ri_locked)
- gfs2_glock_dq_uninit(&ri_gh);
fail:
return error;
}
/**
* gfs2_rlist_add - add a RG to a list of RGs
- * @sdp: the filesystem
+ * @ip: the inode
* @rlist: the list of resource groups
* @block: the block
*
@@ -1800,9 +1561,10 @@ fail:
*
*/
-void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
+void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
u64 block)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_rgrpd *rgd;
struct gfs2_rgrpd **tmp;
unsigned int new_space;
@@ -1811,12 +1573,15 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
if (gfs2_assert_warn(sdp, !rlist->rl_ghs))
return;
- rgd = gfs2_blk2rgrpd(sdp, block);
+ if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block))
+ rgd = ip->i_rgd;
+ else
+ rgd = gfs2_blk2rgrpd(sdp, block);
if (!rgd) {
- if (gfs2_consist(sdp))
- fs_err(sdp, "block = %llu\n", (unsigned long long)block);
+ fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);
return;
}
+ ip->i_rgd = rgd;
for (x = 0; x < rlist->rl_rgrps; x++)
if (rlist->rl_rgd[x] == rgd)
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index a80e303..cf5c501 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -18,18 +18,15 @@ struct gfs2_holder;
extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd);
-struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
-struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
-struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
+extern struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk);
+extern struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp);
+extern struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd);
extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp);
-extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh);
-
-extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd);
-extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd);
-extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd);
-
-extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd);
+extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
+extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
+extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
+extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
static inline void gfs2_alloc_put(struct gfs2_inode *ip)
@@ -39,22 +36,13 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip)
ip->i_alloc = NULL;
}
-extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
- char *file, unsigned int line);
-#define gfs2_inplace_reserve(ip) \
- gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__)
-#define gfs2_inplace_reserve_ri(ip) \
- gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__)
-
+extern int gfs2_inplace_reserve(struct gfs2_inode *ip);
extern void gfs2_inplace_release(struct gfs2_inode *ip);
-extern int gfs2_ri_update(struct gfs2_inode *ip);
extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n);
extern int gfs2_alloc_di(struct gfs2_inode *ip, u64 *bn, u64 *generation);
-extern void __gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen);
-extern void __gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
+extern void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen);
extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip);
extern void gfs2_unlink_di(struct inode *inode);
@@ -68,11 +56,14 @@ struct gfs2_rgrp_list {
struct gfs2_holder *rl_ghs;
};
-extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,
+extern void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
u64 block);
extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state);
extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist);
extern u64 gfs2_ri_total(struct gfs2_sbd *sdp);
extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl);
+extern void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
+ struct buffer_head *bh,
+ const struct gfs2_bitmap *bi);
#endif /* __RGRP_DOT_H__ */
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index fb0edf7..be2ece5 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -752,51 +752,77 @@ static int gfs2_write_inode(struct inode *inode, struct writeback_control *wbc)
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct address_space *metamapping = gfs2_glock2aspace(ip->i_gl);
struct backing_dev_info *bdi = metamapping->backing_dev_info;
- struct gfs2_holder gh;
+ int ret = 0;
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
+ if (bdi->dirty_exceeded)
+ gfs2_ail1_flush(sdp, wbc);
+ else
+ filemap_fdatawrite(metamapping);
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ ret = filemap_fdatawait(metamapping);
+ if (ret)
+ mark_inode_dirty_sync(inode);
+ return ret;
+}
+
+/**
+ * gfs2_dirty_inode - check for atime updates
+ * @inode: The inode in question
+ * @flags: The type of dirty
+ *
+ * Unfortunately it can be called under any combination of inode
+ * glock and transaction lock, so we have to check carefully.
+ *
+ * At the moment this deals only with atime - it should be possible
+ * to expand that role in future, once a review of the locking has
+ * been carried out.
+ */
+
+static void gfs2_dirty_inode(struct inode *inode, int flags)
+{
+ struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
struct buffer_head *bh;
- struct timespec atime;
- struct gfs2_dinode *di;
- int ret = -EAGAIN;
- int unlock_required = 0;
-
- /* Skip timestamp update, if this is from a memalloc */
- if (current->flags & PF_MEMALLOC)
- goto do_flush;
+ struct gfs2_holder gh;
+ int need_unlock = 0;
+ int need_endtrans = 0;
+ int ret;
+
+ if (!(flags & (I_DIRTY_DATASYNC|I_DIRTY_SYNC)))
+ return;
+
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
- if (ret)
- goto do_flush;
- unlock_required = 1;
+ if (ret) {
+ fs_err(sdp, "dirty_inode: glock %d\n", ret);
+ return;
+ }
+ need_unlock = 1;
}
- ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
- if (ret)
- goto do_unlock;
+
+ if (current->journal_info == NULL) {
+ ret = gfs2_trans_begin(sdp, RES_DINODE, 0);
+ if (ret) {
+ fs_err(sdp, "dirty_inode: gfs2_trans_begin %d\n", ret);
+ goto out;
+ }
+ need_endtrans = 1;
+ }
+
ret = gfs2_meta_inode_buffer(ip, &bh);
if (ret == 0) {
- di = (struct gfs2_dinode *)bh->b_data;
- atime.tv_sec = be64_to_cpu(di->di_atime);
- atime.tv_nsec = be32_to_cpu(di->di_atime_nsec);
- if (timespec_compare(&inode->i_atime, &atime) > 0) {
- gfs2_trans_add_bh(ip->i_gl, bh, 1);
- gfs2_dinode_out(ip, bh->b_data);
- }
+ gfs2_trans_add_bh(ip->i_gl, bh, 1);
+ gfs2_dinode_out(ip, bh->b_data);
brelse(bh);
}
- gfs2_trans_end(sdp);
-do_unlock:
- if (unlock_required)
+
+ if (need_endtrans)
+ gfs2_trans_end(sdp);
+out:
+ if (need_unlock)
gfs2_glock_dq_uninit(&gh);
-do_flush:
- if (wbc->sync_mode == WB_SYNC_ALL)
- gfs2_log_flush(GFS2_SB(inode), ip->i_gl);
- filemap_fdatawrite(metamapping);
- if (bdi->dirty_exceeded)
- gfs2_ail1_flush(sdp, wbc);
- if (!ret && (wbc->sync_mode == WB_SYNC_ALL))
- ret = filemap_fdatawait(metamapping);
- if (ret)
- mark_inode_dirty_sync(inode);
- return ret;
}
/**
@@ -1011,7 +1037,6 @@ static int statfs_slow_fill(struct gfs2_rgrpd *rgd,
static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host *sc)
{
- struct gfs2_holder ri_gh;
struct gfs2_rgrpd *rgd_next;
struct gfs2_holder *gha, *gh;
unsigned int slots = 64;
@@ -1024,10 +1049,6 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
if (!gha)
return -ENOMEM;
- error = gfs2_rindex_hold(sdp, &ri_gh);
- if (error)
- goto out;
-
rgd_next = gfs2_rgrpd_get_first(sdp);
for (;;) {
@@ -1070,9 +1091,6 @@ static int gfs2_statfs_slow(struct gfs2_sbd *sdp, struct gfs2_statfs_change_host
yield();
}
- gfs2_glock_dq_uninit(&ri_gh);
-
-out:
kfree(gha);
return error;
}
@@ -1124,6 +1142,10 @@ static int gfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
struct gfs2_statfs_change_host sc;
int error;
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ return error;
+
if (gfs2_tune_get(sdp, gt_statfs_slow))
error = gfs2_statfs_slow(sdp, &sc);
else
@@ -1276,11 +1298,11 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir))
seq_printf(s, ",meta");
if (args->ar_lockproto[0])
- seq_printf(s, ",lockproto=%s", args->ar_lockproto);
+ seq_show_option(s, "lockproto", args->ar_lockproto);
if (args->ar_locktable[0])
- seq_printf(s, ",locktable=%s", args->ar_locktable);
+ seq_show_option(s, "locktable", args->ar_locktable);
if (args->ar_hostdata[0])
- seq_printf(s, ",hostdata=%s", args->ar_hostdata);
+ seq_show_option(s, "hostdata", args->ar_hostdata);
if (args->ar_spectator)
seq_printf(s, ",spectator");
if (args->ar_localflocks)
@@ -1394,21 +1416,17 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
if (error)
goto out;
- error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
- if (error)
- goto out_qs;
-
rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr);
if (!rgd) {
gfs2_consist_inode(ip);
error = -EIO;
- goto out_rindex_relse;
+ goto out_qs;
}
error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
&al->al_rgd_gh);
if (error)
- goto out_rindex_relse;
+ goto out_qs;
error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA,
sdp->sd_jdesc->jd_blocks);
@@ -1423,8 +1441,6 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
out_rg_gunlock:
gfs2_glock_dq_uninit(&al->al_rgd_gh);
-out_rindex_relse:
- gfs2_glock_dq_uninit(&al->al_ri_gh);
out_qs:
gfs2_quota_unhold(ip);
out:
@@ -1471,9 +1487,11 @@ static void gfs2_evict_inode(struct inode *inode)
goto out;
}
- error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
- if (error)
- goto out_truncate;
+ if (!test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
+ error = gfs2_check_blk_type(sdp, ip->i_no_addr, GFS2_BLKST_UNLINKED);
+ if (error)
+ goto out_truncate;
+ }
if (test_bit(GIF_INVALID, &ip->i_flags)) {
error = gfs2_inode_refresh(ip);
@@ -1513,6 +1531,10 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_unlock;
out_truncate:
+ gfs2_log_flush(sdp, ip->i_gl);
+ write_inode_now(inode, 1);
+ gfs2_ail_flush(ip->i_gl, 0);
+
/* Case 2 starts here */
error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks);
if (error)
@@ -1533,7 +1555,7 @@ out:
/* Case 3 starts here */
truncate_inode_pages(&inode->i_data, 0);
end_writeback(inode);
-
+ gfs2_dir_hash_inval(ip);
ip->i_gl->gl_object = NULL;
gfs2_glock_add_to_lru(ip->i_gl);
gfs2_glock_put(ip->i_gl);
@@ -1552,6 +1574,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
if (ip) {
ip->i_flags = 0;
ip->i_gl = NULL;
+ ip->i_rgd = NULL;
}
return &ip->i_inode;
}
@@ -1572,6 +1595,7 @@ const struct super_operations gfs2_super_ops = {
.alloc_inode = gfs2_alloc_inode,
.destroy_inode = gfs2_destroy_inode,
.write_inode = gfs2_write_inode,
+ .dirty_inode = gfs2_dirty_inode,
.evict_inode = gfs2_evict_inode,
.put_super = gfs2_put_super,
.sync_fs = gfs2_sync_fs,
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index e6453c3..6ab2a77 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -193,8 +193,3 @@ void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
gfs2_log_unlock(sdp);
}
-void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd)
-{
- lops_add(rgd->rd_sbd, &rgd->rd_le);
-}
-
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h
index fb56b78..f8f101e 100644
--- a/fs/gfs2/trans.h
+++ b/fs/gfs2/trans.h
@@ -28,20 +28,20 @@ struct gfs2_glock;
/* reserve either the number of blocks to be allocated plus the rg header
* block, or all of the blocks in the rg, whichever is smaller */
-static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al)
+static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip)
{
- return (al->al_requested < al->al_rgd->rd_length)?
- al->al_requested + 1 : al->al_rgd->rd_length;
+ const struct gfs2_alloc *al = ip->i_alloc;
+ if (al->al_requested < ip->i_rgd->rd_length)
+ return al->al_requested + 1;
+ return ip->i_rgd->rd_length;
}
-int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
- unsigned int revokes);
+extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
+ unsigned int revokes);
-void gfs2_trans_end(struct gfs2_sbd *sdp);
-
-void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
-void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
-void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
-void gfs2_trans_add_rg(struct gfs2_rgrpd *rgd);
+extern void gfs2_trans_end(struct gfs2_sbd *sdp);
+extern void gfs2_trans_add_bh(struct gfs2_glock *gl, struct buffer_head *bh, int meta);
+extern void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
+extern void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len);
#endif /* __TRANS_DOT_H__ */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 439b61c..71d7bf8 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -332,15 +332,8 @@ static int ea_remove_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
if (error)
goto out_alloc;
- error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh);
- if (error)
- goto out_quota;
-
error = ea_dealloc_unstuffed(ip, bh, ea, prev, (leave) ? &error : NULL);
- gfs2_glock_dq_uninit(&al->al_ri_gh);
-
-out_quota:
gfs2_quota_unhold(ip);
out_alloc:
gfs2_alloc_put(ip);
@@ -734,7 +727,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er,
goto out_gunlock_q;
error = gfs2_trans_begin(GFS2_SB(&ip->i_inode),
- blks + gfs2_rg_blocks(al) +
+ blks + gfs2_rg_blocks(ip) +
RES_DINODE + RES_STATFS + RES_QUOTA, 0);
if (error)
goto out_ipres;
@@ -1296,7 +1289,8 @@ fail:
int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
{
- struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+ struct inode *inode = &ip->i_inode;
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_ea_location el;
int error;
@@ -1319,7 +1313,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
if (error)
return error;
- error = gfs2_setattr_simple(ip, attr);
+ error = gfs2_setattr_simple(inode, attr);
gfs2_trans_end(sdp);
return error;
}
@@ -1362,14 +1356,14 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip)
blen++;
else {
if (bstart)
- gfs2_rlist_add(sdp, &rlist, bstart);
+ gfs2_rlist_add(ip, &rlist, bstart);
bstart = bn;
blen = 1;
}
blks++;
}
if (bstart)
- gfs2_rlist_add(sdp, &rlist, bstart);
+ gfs2_rlist_add(ip, &rlist, bstart);
else
goto out;
@@ -1501,24 +1495,18 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
if (error)
goto out_alloc;
- error = gfs2_rindex_hold(GFS2_SB(&ip->i_inode), &al->al_ri_gh);
- if (error)
- goto out_quota;
-
error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
if (error)
- goto out_rindex;
+ goto out_quota;
if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
error = ea_dealloc_indirect(ip);
if (error)
- goto out_rindex;
+ goto out_quota;
}
error = ea_dealloc_block(ip);
-out_rindex:
- gfs2_glock_dq_uninit(&al->al_ri_gh);
out_quota:
gfs2_quota_unhold(ip);
out_alloc:
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 828a0e1..926d020 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -156,7 +156,7 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size)
return ERR_PTR(-EINVAL);
}
-static struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
+struct posix_acl *jffs2_get_acl(struct inode *inode, int type)
{
struct posix_acl *acl;
char *value = NULL;
@@ -227,7 +227,7 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
case ACL_TYPE_ACCESS:
xprefix = JFFS2_XPREFIX_ACL_ACCESS;
if (acl) {
- mode_t mode = inode->i_mode;
+ umode_t mode = inode->i_mode;
rc = posix_acl_equiv_mode(acl, &mode);
if (rc < 0)
return rc;
@@ -259,30 +259,11 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
return rc;
}
-int jffs2_check_acl(struct inode *inode, int mask, unsigned int flags)
+int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, umode_t *i_mode)
{
struct posix_acl *acl;
int rc;
- if (flags & IPERM_FLAG_RCU)
- return -ECHILD;
-
- acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- rc = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- return rc;
- }
- return -EAGAIN;
-}
-
-int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
-{
- struct posix_acl *acl, *clone;
- int rc;
-
cache_no_acl(inode);
if (S_ISLNK(*i_mode))
@@ -298,18 +279,13 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode)
if (S_ISDIR(*i_mode))
set_cached_acl(inode, ACL_TYPE_DEFAULT, acl);
- clone = posix_acl_clone(acl, GFP_KERNEL);
- if (!clone)
- return -ENOMEM;
- rc = posix_acl_create_masq(clone, (mode_t *)i_mode);
- if (rc < 0) {
- posix_acl_release(clone);
+ rc = posix_acl_create(&acl, GFP_KERNEL, i_mode);
+ if (rc < 0)
return rc;
- }
if (rc > 0)
- set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, acl);
- posix_acl_release(clone);
+ posix_acl_release(acl);
}
return 0;
}
@@ -335,7 +311,7 @@ int jffs2_init_acl_post(struct inode *inode)
int jffs2_acl_chmod(struct inode *inode)
{
- struct posix_acl *acl, *clone;
+ struct posix_acl *acl;
int rc;
if (S_ISLNK(inode->i_mode))
@@ -343,14 +319,11 @@ int jffs2_acl_chmod(struct inode *inode)
acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS);
if (IS_ERR(acl) || !acl)
return PTR_ERR(acl);
- clone = posix_acl_clone(acl, GFP_KERNEL);
+ rc = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (rc)
+ return rc;
+ rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, acl);
posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
- rc = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!rc)
- rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, clone);
- posix_acl_release(clone);
return rc;
}
diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h
index 3119f59..9b47724 100644
--- a/fs/jffs2/acl.h
+++ b/fs/jffs2/acl.h
@@ -26,9 +26,9 @@ struct jffs2_acl_header {
#ifdef CONFIG_JFFS2_FS_POSIX_ACL
-extern int jffs2_check_acl(struct inode *, int, unsigned int);
+struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
extern int jffs2_acl_chmod(struct inode *);
-extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *);
+extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
extern int jffs2_init_acl_post(struct inode *);
extern const struct xattr_handler jffs2_acl_access_xattr_handler;
@@ -36,7 +36,7 @@ extern const struct xattr_handler jffs2_acl_default_xattr_handler;
#else
-#define jffs2_check_acl (NULL)
+#define jffs2_get_acl (NULL)
#define jffs2_acl_chmod(inode) (0)
#define jffs2_init_acl_pre(dir_i,inode,mode) (0)
#define jffs2_init_acl_post(inode) (0)
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index de42470..5b6c9d1 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -53,6 +53,78 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this,
return 0;
}
+/*
+ * jffs2_selected_compress:
+ * @compr: Explicit compression type to use (ie, JFFS2_COMPR_ZLIB).
+ * If 0, just take the first available compression mode.
+ * @data_in: Pointer to uncompressed data
+ * @cpage_out: Pointer to returned pointer to buffer for compressed data
+ * @datalen: On entry, holds the amount of data available for compression.
+ * On exit, expected to hold the amount of data actually compressed.
+ * @cdatalen: On entry, holds the amount of space available for compressed
+ * data. On exit, expected to hold the actual size of the compressed
+ * data.
+ *
+ * Returns: the compression type used. Zero is used to show that the data
+ * could not be compressed; probably because we couldn't find the requested
+ * compression mode.
+ */
+static int jffs2_selected_compress(u8 compr, unsigned char *data_in,
+ unsigned char **cpage_out, u32 *datalen, u32 *cdatalen)
+{
+ struct jffs2_compressor *this;
+ int err, ret = JFFS2_COMPR_NONE;
+ uint32_t orig_slen, orig_dlen;
+ char *output_buf;
+
+ output_buf = kmalloc(*cdatalen, GFP_KERNEL);
+ if (!output_buf) {
+ printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n");
+ return ret;
+ }
+ orig_slen = *datalen;
+ orig_dlen = *cdatalen;
+ spin_lock(&jffs2_compressor_list_lock);
+ list_for_each_entry(this, &jffs2_compressor_list, list) {
+ /* Skip decompress-only and disabled modules */
+ if (!this->compress || this->disabled)
+ continue;
+
+ /* Skip if not the desired compression type */
+ if (compr && (compr != this->compr))
+ continue;
+
+ /*
+ * Either compression type was unspecified, or we found our
+ * compressor; either way, we're good to go.
+ */
+ this->usecount++;
+ spin_unlock(&jffs2_compressor_list_lock);
+
+ *datalen = orig_slen;
+ *cdatalen = orig_dlen;
+ err = this->compress(data_in, output_buf, datalen, cdatalen);
+
+ spin_lock(&jffs2_compressor_list_lock);
+ this->usecount--;
+ if (!err) {
+ /* Success */
+ ret = this->compr;
+ this->stat_compr_blocks++;
+ this->stat_compr_orig_size += *datalen;
+ this->stat_compr_new_size += *cdatalen;
+ break;
+ }
+ }
+ spin_unlock(&jffs2_compressor_list_lock);
+ if (ret == JFFS2_COMPR_NONE)
+ kfree(output_buf);
+ else
+ *cpage_out = output_buf;
+
+ return ret;
+}
+
/* jffs2_compress:
* @data_in: Pointer to uncompressed data
* @cpage_out: Pointer to returned pointer to buffer for compressed data
@@ -76,47 +148,23 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
uint32_t *datalen, uint32_t *cdatalen)
{
int ret = JFFS2_COMPR_NONE;
- int compr_ret;
+ int mode, compr_ret;
struct jffs2_compressor *this, *best=NULL;
unsigned char *output_buf = NULL, *tmp_buf;
uint32_t orig_slen, orig_dlen;
uint32_t best_slen=0, best_dlen=0;
- switch (jffs2_compression_mode) {
+ if (c->mount_opts.override_compr)
+ mode = c->mount_opts.compr;
+ else
+ mode = jffs2_compression_mode;
+
+ switch (mode) {
case JFFS2_COMPR_MODE_NONE:
break;
case JFFS2_COMPR_MODE_PRIORITY:
- output_buf = kmalloc(*cdatalen,GFP_KERNEL);
- if (!output_buf) {
- printk(KERN_WARNING "JFFS2: No memory for compressor allocation. Compression failed.\n");
- goto out;
- }
- orig_slen = *datalen;
- orig_dlen = *cdatalen;
- spin_lock(&jffs2_compressor_list_lock);
- list_for_each_entry(this, &jffs2_compressor_list, list) {
- /* Skip decompress-only backwards-compatibility and disabled modules */
- if ((!this->compress)||(this->disabled))
- continue;
-
- this->usecount++;
- spin_unlock(&jffs2_compressor_list_lock);
- *datalen = orig_slen;
- *cdatalen = orig_dlen;
- compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
- spin_lock(&jffs2_compressor_list_lock);
- this->usecount--;
- if (!compr_ret) {
- ret = this->compr;
- this->stat_compr_blocks++;
- this->stat_compr_orig_size += *datalen;
- this->stat_compr_new_size += *cdatalen;
- break;
- }
- }
- spin_unlock(&jffs2_compressor_list_lock);
- if (ret == JFFS2_COMPR_NONE)
- kfree(output_buf);
+ ret = jffs2_selected_compress(0, data_in, cpage_out, datalen,
+ cdatalen);
break;
case JFFS2_COMPR_MODE_SIZE:
case JFFS2_COMPR_MODE_FAVOURLZO:
@@ -174,22 +222,28 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
best->stat_compr_orig_size += best_slen;
best->stat_compr_new_size += best_dlen;
ret = best->compr;
+ *cpage_out = output_buf;
}
spin_unlock(&jffs2_compressor_list_lock);
break;
+ case JFFS2_COMPR_MODE_FORCELZO:
+ ret = jffs2_selected_compress(JFFS2_COMPR_LZO, data_in,
+ cpage_out, datalen, cdatalen);
+ break;
+ case JFFS2_COMPR_MODE_FORCEZLIB:
+ ret = jffs2_selected_compress(JFFS2_COMPR_ZLIB, data_in,
+ cpage_out, datalen, cdatalen);
+ break;
default:
printk(KERN_ERR "JFFS2: unknown compression mode.\n");
}
- out:
+
if (ret == JFFS2_COMPR_NONE) {
*cpage_out = data_in;
*datalen = *cdatalen;
none_stat_compr_blocks++;
none_stat_compr_size += *datalen;
}
- else {
- *cpage_out = output_buf;
- }
return ret;
}
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index 13bb759..5e91d57 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -40,6 +40,8 @@
#define JFFS2_COMPR_MODE_PRIORITY 1
#define JFFS2_COMPR_MODE_SIZE 2
#define JFFS2_COMPR_MODE_FAVOURLZO 3
+#define JFFS2_COMPR_MODE_FORCELZO 4
+#define JFFS2_COMPR_MODE_FORCEZLIB 5
#define FAVOUR_LZO_PERCENT 80
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 16a5047..406d9cc 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
unsigned char *cpage_out,
uint32_t *sourcelen, uint32_t *dstlen)
{
- short positions[256];
+ unsigned short positions[256];
int outpos = 0;
int pos=0;
@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
unsigned char *cpage_out,
uint32_t srclen, uint32_t destlen)
{
- short positions[256];
+ unsigned short positions[256];
int outpos = 0;
int pos=0;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index 4bca6a2..be6169b 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -56,7 +56,7 @@ const struct inode_operations jffs2_dir_inode_operations =
.rmdir = jffs2_rmdir,
.mknod = jffs2_mknod,
.rename = jffs2_rename,
- .check_acl = jffs2_check_acl,
+ .get_acl = jffs2_get_acl,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
@@ -102,10 +102,8 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target,
mutex_unlock(&dir_f->sem);
if (ino) {
inode = jffs2_iget(dir_i->i_sb, ino);
- if (IS_ERR(inode)) {
+ if (IS_ERR(inode))
printk(KERN_WARNING "iget() failed for ino #%u\n", ino);
- return ERR_CAST(inode);
- }
}
return d_splice_alias(inode, target);
@@ -247,7 +245,7 @@ static int jffs2_unlink(struct inode *dir_i, struct dentry *dentry)
ret = jffs2_do_unlink(c, dir_f, dentry->d_name.name,
dentry->d_name.len, dead_f, now);
if (dead_f->inocache)
- dentry->d_inode->i_nlink = dead_f->inocache->pino_nlink;
+ set_nlink(dentry->d_inode, dead_f->inocache->pino_nlink);
if (!ret)
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
return ret;
@@ -280,7 +278,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
if (!ret) {
mutex_lock(&f->sem);
- old_dentry->d_inode->i_nlink = ++f->inocache->pino_nlink;
+ set_nlink(old_dentry->d_inode, ++f->inocache->pino_nlink);
mutex_unlock(&f->sem);
d_instantiate(dentry, old_dentry->d_inode);
dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
@@ -499,7 +497,7 @@ static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, int mode)
f = JFFS2_INODE_INFO(inode);
/* Directories get nlink 2 at start */
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
/* but ic->pino_nlink is the parent ino# */
f->inocache->pino_nlink = dir_i->i_ino;
@@ -822,7 +820,10 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
if (victim_f) {
/* There was a victim. Kill it off nicely */
- drop_nlink(new_dentry->d_inode);
+ if (S_ISDIR(new_dentry->d_inode->i_mode))
+ clear_nlink(new_dentry->d_inode);
+ else
+ drop_nlink(new_dentry->d_inode);
/* Don't oops if the victim was a dirent pointing to an
inode which didn't exist. */
if (victim_f->inocache) {
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 1c0a08d..0095a70 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -27,13 +27,20 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page **pagep, void **fsdata);
static int jffs2_readpage (struct file *filp, struct page *pg);
-int jffs2_fsync(struct file *filp, int datasync)
+int jffs2_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
{
struct inode *inode = filp->f_mapping->host;
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ int ret;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
/* Trigger GC to flush any pending writes for this inode */
jffs2_flush_wbuf_gc(c, inode->i_ino);
+ mutex_unlock(&inode->i_mutex);
return 0;
}
@@ -56,7 +63,7 @@ const struct file_operations jffs2_file_operations =
const struct inode_operations jffs2_file_inode_operations =
{
- .check_acl = jffs2_check_acl,
+ .get_acl = jffs2_get_acl,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
@@ -128,33 +135,39 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
+ struct jffs2_raw_inode ri;
+ uint32_t alloc_len = 0;
pgoff_t index = pos >> PAGE_CACHE_SHIFT;
uint32_t pageofs = index << PAGE_CACHE_SHIFT;
int ret = 0;
+ D1(printk(KERN_DEBUG "%s()\n", __func__));
+
+ if (pageofs > inode->i_size) {
+ ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
+ ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
+ if (ret)
+ return ret;
+ }
+
+ mutex_lock(&f->sem);
pg = grab_cache_page_write_begin(mapping, index, flags);
- if (!pg)
+ if (!pg) {
+ if (alloc_len)
+ jffs2_complete_reservation(c);
+ mutex_unlock(&f->sem);
return -ENOMEM;
+ }
*pagep = pg;
- D1(printk(KERN_DEBUG "jffs2_write_begin()\n"));
-
- if (pageofs > inode->i_size) {
+ if (alloc_len) {
/* Make new hole frag from old EOF to new page */
- struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
- struct jffs2_raw_inode ri;
struct jffs2_full_dnode *fn;
- uint32_t alloc_len;
D1(printk(KERN_DEBUG "Writing new hole frag 0x%x-0x%x between current EOF and new page\n",
(unsigned int)inode->i_size, pageofs));
- ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
- ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
- if (ret)
- goto out_page;
-
- mutex_lock(&f->sem);
memset(&ri, 0, sizeof(ri));
ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
@@ -181,7 +194,6 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
if (IS_ERR(fn)) {
ret = PTR_ERR(fn);
jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
goto out_page;
}
ret = jffs2_add_full_dnode_to_inode(c, f, fn);
@@ -195,12 +207,10 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
jffs2_mark_node_obsolete(c, fn->raw);
jffs2_free_full_dnode(fn);
jffs2_complete_reservation(c);
- mutex_unlock(&f->sem);
goto out_page;
}
jffs2_complete_reservation(c);
inode->i_size = pageofs;
- mutex_unlock(&f->sem);
}
/*
@@ -209,18 +219,18 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
* case of a short-copy.
*/
if (!PageUptodate(pg)) {
- mutex_lock(&f->sem);
ret = jffs2_do_readpage_nolock(inode, pg);
- mutex_unlock(&f->sem);
if (ret)
goto out_page;
}
+ mutex_unlock(&f->sem);
D1(printk(KERN_DEBUG "end write_begin(). pg->flags %lx\n", pg->flags));
return ret;
out_page:
unlock_page(pg);
page_cache_release(pg);
+ mutex_unlock(&f->sem);
return ret;
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 46ad619..4b8afe3 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -80,7 +80,7 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
if (ret) {
jffs2_free_raw_inode(ri);
- if (S_ISLNK(inode->i_mode & S_IFMT))
+ if (S_ISLNK(inode->i_mode))
kfree(mdata);
return ret;
}
@@ -278,7 +278,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
inode->i_mtime = ITIME(je32_to_cpu(latest_node.mtime));
inode->i_ctime = ITIME(je32_to_cpu(latest_node.ctime));
- inode->i_nlink = f->inocache->pino_nlink;
+ set_nlink(inode, f->inocache->pino_nlink);
inode->i_blocks = (inode->i_size + 511) >> 9;
@@ -291,7 +291,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
case S_IFDIR:
{
struct jffs2_full_dirent *fd;
- inode->i_nlink = 2; /* parent and '.' */
+ set_nlink(inode, 2); /* parent and '.' */
for (fd=f->dents; fd; fd = fd->next) {
if (fd->type == DT_DIR && fd->ino)
@@ -379,7 +379,7 @@ void jffs2_dirty_inode(struct inode *inode, int flags)
jffs2_do_setattr(inode, &iattr);
}
-int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
+int jffs2_do_remount_fs(struct super_block *sb, int *flags, char *data)
{
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
@@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data)
/* jffs2_new_inode: allocate a new inode and inocache, add it to the hash,
fill in the raw_inode while you're at it. */
-struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri)
+struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_raw_inode *ri)
{
struct inode *inode;
struct super_block *sb = dir_i->i_sb;
@@ -453,7 +453,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
iput(inode);
return ERR_PTR(ret);
}
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_ino = je32_to_cpu(ri->ino);
inode->i_mode = jemode_to_cpu(ri->mode);
inode->i_gid = je16_to_cpu(ri->gid);
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 0bc6a6c..55a0c1d 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -29,6 +29,11 @@
struct jffs2_inodirty;
+struct jffs2_mount_opts {
+ bool override_compr;
+ unsigned int compr;
+};
+
/* A struct for the overall file system control. Pointers to
jffs2_sb_info structs are named `c' in the source code.
Nee jffs_control
@@ -126,6 +131,7 @@ struct jffs2_sb_info {
#endif
struct jffs2_summary *summary; /* Summary information */
+ struct jffs2_mount_opts mount_opts;
#ifdef CONFIG_JFFS2_FS_XATTR
#define XATTRINDEX_HASHSIZE (57)
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index e4619b0..fa35ff7 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info
uint32_t version;
uint32_t data_crc;
uint32_t partial_crc;
- uint16_t csize;
+ uint32_t csize;
uint16_t overlapped;
};
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index e304795..145ba39 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -128,6 +128,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
spin_unlock(&c->erase_completion_lock);
schedule();
+ remove_wait_queue(&c->erase_wait, &wait);
} else
spin_unlock(&c->erase_completion_lock);
} else if (ret)
@@ -158,19 +159,24 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
uint32_t *len, uint32_t sumsize)
{
- int ret = -EAGAIN;
+ int ret;
minsize = PAD(minsize);
D1(printk(KERN_DEBUG "jffs2_reserve_space_gc(): Requested 0x%x bytes\n", minsize));
- spin_lock(&c->erase_completion_lock);
- while(ret == -EAGAIN) {
+ while (true) {
+ spin_lock(&c->erase_completion_lock);
ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
if (ret) {
D1(printk(KERN_DEBUG "jffs2_reserve_space_gc: looping, ret is %d\n", ret));
}
+ spin_unlock(&c->erase_completion_lock);
+
+ if (ret == -EAGAIN)
+ cond_resched();
+ else
+ break;
}
- spin_unlock(&c->erase_completion_lock);
if (!ret)
ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
@@ -355,16 +361,14 @@ static int jffs2_do_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
spin_unlock(&c->erase_completion_lock);
ret = jffs2_prealloc_raw_node_refs(c, jeb, 1);
-
+ if (ret)
+ return ret;
/* Just lock it again and continue. Nothing much can change because
we hold c->alloc_sem anyway. In fact, it's not entirely clear why
we hold c->erase_completion_lock in the majority of this function...
but that's a question for another (more caffeine-rich) day. */
spin_lock(&c->erase_completion_lock);
- if (ret)
- return ret;
-
waste = jeb->free_size;
jffs2_link_node_ref(c, jeb,
(jeb->offset + c->sector_size - waste) | REF_OBSOLETE,
diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h
index 65c6c43..ab65ee3 100644
--- a/fs/jffs2/os-linux.h
+++ b/fs/jffs2/os-linux.h
@@ -158,7 +158,7 @@ extern const struct inode_operations jffs2_dir_inode_operations;
extern const struct file_operations jffs2_file_operations;
extern const struct inode_operations jffs2_file_inode_operations;
extern const struct address_space_operations jffs2_file_address_operations;
-int jffs2_fsync(struct file *, int);
+int jffs2_fsync(struct file *, loff_t, loff_t, int);
int jffs2_do_readpage_unlock (struct inode *inode, struct page *pg);
/* ioctl.c */
@@ -173,10 +173,10 @@ int jffs2_do_setattr (struct inode *, struct iattr *);
struct inode *jffs2_iget(struct super_block *, unsigned long);
void jffs2_evict_inode (struct inode *);
void jffs2_dirty_inode(struct inode *inode, int flags);
-struct inode *jffs2_new_inode (struct inode *dir_i, int mode,
+struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode,
struct jffs2_raw_inode *ri);
int jffs2_statfs (struct dentry *, struct kstatfs *);
-int jffs2_remount_fs (struct super_block *, int *, char *);
+int jffs2_do_remount_fs(struct super_block *, int *, char *);
int jffs2_do_fill_super(struct super_block *sb, void *data, int silent);
void jffs2_gc_release_inode(struct jffs2_sb_info *c,
struct jffs2_inode_info *f);
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 2ab1a0d..ee57bac 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -1041,7 +1041,7 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf
/* FIXME: point() */
err = jffs2_flash_read(c, ref_offset(ref), len, &retlen, buf);
if (err) {
- JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
+ JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ref_offset(ref), err);
goto free_out;
}
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 8d8cd34..327cc17 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -275,9 +275,7 @@ int jffs2_scan_medium(struct jffs2_sb_info *c)
else
c->mtd->unpoint(c->mtd, 0, c->mtd->size);
#endif
- if (s)
- kfree(s);
-
+ kfree(s);
return ret;
}
@@ -505,6 +503,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
sumlen = c->sector_size - je32_to_cpu(sm->offset);
sumptr = buf + buf_size - sumlen;
+ /* sm->offset maybe wrong but MAGIC maybe right */
+ if (sumlen > c->sector_size)
+ goto full_scan;
+
/* Now, make sure the summary itself is available */
if (sumlen > buf_size) {
/* Need to kmalloc for this. */
@@ -539,6 +541,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
}
}
+full_scan:
buf_ofs = jeb->offset;
if (!buf_size) {
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index cfeb716..0f20208 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -22,26 +22,29 @@
#include <linux/security.h>
#include "nodelist.h"
-/* ---- Initial Security Label Attachment -------------- */
-int jffs2_init_security(struct inode *inode, struct inode *dir,
- const struct qstr *qstr)
+/* ---- Initial Security Label(s) Attachment callback --- */
+int jffs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
+ void *fs_info)
{
- int rc;
- size_t len;
- void *value;
- char *name;
+ const struct xattr *xattr;
+ int err = 0;
- rc = security_inode_init_security(inode, dir, qstr, &name, &value, &len);
- if (rc) {
- if (rc == -EOPNOTSUPP)
- return 0;
- return rc;
+ for (xattr = xattr_array; xattr->name != NULL; xattr++) {
+ err = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY,
+ xattr->name, xattr->value,
+ xattr->value_len, 0);
+ if (err < 0)
+ break;
}
- rc = do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, value, len, 0);
+ return err;
+}
- kfree(name);
- kfree(value);
- return rc;
+/* ---- Initial Security Label(s) Attachment ----------- */
+int jffs2_init_security(struct inode *inode, struct inode *dir,
+ const struct qstr *qstr)
+{
+ return security_inode_init_security(inode, dir, qstr,
+ &jffs2_initxattrs, NULL);
}
/* ---- XATTR Handler for "security.*" ----------------- */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 853b8e3..e7e9744 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -17,11 +17,13 @@
#include <linux/fs.h>
#include <linux/err.h>
#include <linux/mount.h>
+#include <linux/parser.h>
#include <linux/jffs2.h>
#include <linux/pagemap.h>
#include <linux/mtd/super.h>
#include <linux/ctype.h>
#include <linux/namei.h>
+#include <linux/seq_file.h>
#include <linux/exportfs.h>
#include "compr.h"
#include "nodelist.h"
@@ -75,6 +77,37 @@ static void jffs2_write_super(struct super_block *sb)
unlock_super(sb);
}
+static const char *jffs2_compr_name(unsigned int compr)
+{
+ switch (compr) {
+ case JFFS2_COMPR_MODE_NONE:
+ return "none";
+#ifdef CONFIG_JFFS2_LZO
+ case JFFS2_COMPR_MODE_FORCELZO:
+ return "lzo";
+#endif
+#ifdef CONFIG_JFFS2_ZLIB
+ case JFFS2_COMPR_MODE_FORCEZLIB:
+ return "zlib";
+#endif
+ default:
+ /* should never happen; programmer error */
+ WARN_ON(1);
+ return "";
+ }
+}
+
+static int jffs2_show_options(struct seq_file *s, struct vfsmount *mnt)
+{
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(mnt->mnt_sb);
+ struct jffs2_mount_opts *opts = &c->mount_opts;
+
+ if (opts->override_compr)
+ seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr));
+
+ return 0;
+}
+
static int jffs2_sync_fs(struct super_block *sb, int wait)
{
struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
@@ -133,6 +166,85 @@ static const struct export_operations jffs2_export_ops = {
.fh_to_parent = jffs2_fh_to_parent,
};
+/*
+ * JFFS2 mount options.
+ *
+ * Opt_override_compr: override default compressor
+ * Opt_err: just end of array marker
+ */
+enum {
+ Opt_override_compr,
+ Opt_err,
+};
+
+static const match_table_t tokens = {
+ {Opt_override_compr, "compr=%s"},
+ {Opt_err, NULL},
+};
+
+static int jffs2_parse_options(struct jffs2_sb_info *c, char *data)
+{
+ substring_t args[MAX_OPT_ARGS];
+ char *p, *name;
+
+ if (!data)
+ return 0;
+
+ while ((p = strsep(&data, ","))) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_override_compr:
+ name = match_strdup(&args[0]);
+
+ if (!name)
+ return -ENOMEM;
+ if (!strcmp(name, "none"))
+ c->mount_opts.compr = JFFS2_COMPR_MODE_NONE;
+#ifdef CONFIG_JFFS2_LZO
+ else if (!strcmp(name, "lzo"))
+ c->mount_opts.compr = JFFS2_COMPR_MODE_FORCELZO;
+#endif
+#ifdef CONFIG_JFFS2_ZLIB
+ else if (!strcmp(name, "zlib"))
+ c->mount_opts.compr =
+ JFFS2_COMPR_MODE_FORCEZLIB;
+#endif
+ else {
+ printk(KERN_ERR "JFFS2 Error: unknown compressor \"%s\"",
+ name);
+ kfree(name);
+ return -EINVAL;
+ }
+ kfree(name);
+ c->mount_opts.override_compr = true;
+ break;
+ default:
+ printk(KERN_ERR "JFFS2 Error: unrecognized mount option '%s' or missing value\n",
+ p);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ struct jffs2_sb_info *c = JFFS2_SB_INFO(sb);
+ int err;
+
+ err = jffs2_parse_options(c, data);
+ if (err)
+ return -EINVAL;
+
+ return jffs2_do_remount_fs(sb, flags, data);
+}
+
static const struct super_operations jffs2_super_operations =
{
.alloc_inode = jffs2_alloc_inode,
@@ -143,6 +255,7 @@ static const struct super_operations jffs2_super_operations =
.remount_fs = jffs2_remount_fs,
.evict_inode = jffs2_evict_inode,
.dirty_inode = jffs2_dirty_inode,
+ .show_options = jffs2_show_options,
.sync_fs = jffs2_sync_fs,
};
@@ -166,6 +279,12 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
c->os_priv = sb;
sb->s_fs_info = c;
+ ret = jffs2_parse_options(c, data);
+ if (ret) {
+ kfree(c);
+ return -EINVAL;
+ }
+
/* Initialize JFFS2 superblock locks, the further initialization will
* be done later */
mutex_init(&c->alloc_sem);
diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c
index b955626..e3035af 100644
--- a/fs/jffs2/symlink.c
+++ b/fs/jffs2/symlink.c
@@ -20,7 +20,7 @@ const struct inode_operations jffs2_symlink_inode_operations =
{
.readlink = generic_readlink,
.follow_link = jffs2_follow_link,
- .check_acl = jffs2_check_acl,
+ .get_acl = jffs2_get_acl,
.setattr = jffs2_setattr,
.setxattr = jffs2_setxattr,
.getxattr = jffs2_getxattr,
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 4515bea..464cd76 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -578,8 +578,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad)
if (!jffs2_is_writebuffered(c))
return 0;
- if (mutex_trylock(&c->alloc_sem)) {
- mutex_unlock(&c->alloc_sem);
+ if (!mutex_is_locked(&c->alloc_sem)) {
printk(KERN_CRIT "jffs2_flush_wbuf() called with alloc_sem not locked!\n");
BUG();
}
@@ -1026,18 +1025,18 @@ int jffs2_check_oob_empty(struct jffs2_sb_info *c,
int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
struct mtd_oob_ops ops;
- ops.mode = MTD_OOB_AUTO;
+ ops.mode = MTD_OPS_AUTO_OOB;
ops.ooblen = NR_OOB_SCAN_PAGES * c->oobavail;
ops.oobbuf = c->oobbuf;
ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
ops.datbuf = NULL;
ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
- if (ret || ops.oobretlen != ops.ooblen) {
+ if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) {
printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
" bytes, read %zd bytes, error %d\n",
jeb->offset, ops.ooblen, ops.oobretlen, ret);
- if (!ret)
+ if (!ret || mtd_is_bitflip(ret))
ret = -EIO;
return ret;
}
@@ -1069,18 +1068,18 @@ int jffs2_check_nand_cleanmarker(struct jffs2_sb_info *c,
struct mtd_oob_ops ops;
int ret, cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
- ops.mode = MTD_OOB_AUTO;
+ ops.mode = MTD_OPS_AUTO_OOB;
ops.ooblen = cmlen;
ops.oobbuf = c->oobbuf;
ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
ops.datbuf = NULL;
ret = c->mtd->read_oob(c->mtd, jeb->offset, &ops);
- if (ret || ops.oobretlen != ops.ooblen) {
+ if ((ret && !mtd_is_bitflip(ret)) || ops.oobretlen != ops.ooblen) {
printk(KERN_ERR "cannot read OOB for EB at %08x, requested %zd"
" bytes, read %zd bytes, error %d\n",
jeb->offset, ops.ooblen, ops.oobretlen, ret);
- if (!ret)
+ if (!ret || mtd_is_bitflip(ret))
ret = -EIO;
return ret;
}
@@ -1095,7 +1094,7 @@ int jffs2_write_nand_cleanmarker(struct jffs2_sb_info *c,
struct mtd_oob_ops ops;
int cmlen = min_t(int, c->oobavail, OOB_CM_SIZE);
- ops.mode = MTD_OOB_AUTO;
+ ops.mode = MTD_OPS_AUTO_OOB;
ops.ooblen = cmlen;
ops.oobbuf = (uint8_t *)&oob_cleanmarker;
ops.len = ops.ooboffs = ops.retlen = ops.oobretlen = 0;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1afae26..b7d7f67 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -197,7 +197,7 @@ static int logfs_remove_inode(struct inode *inode)
{
int ret;
- inode->i_nlink--;
+ drop_nlink(inode);
ret = write_inode(inode);
LOGFS_BUG_ON(ret, inode->i_sb);
return ret;
@@ -371,11 +371,9 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
page_cache_release(page);
inode = logfs_iget(dir->i_sb, ino);
- if (IS_ERR(inode)) {
+ if (IS_ERR(inode))
printk(KERN_ERR"LogFS: Cannot read inode #%llx for dentry (%lx, %lx)n",
ino, dir->i_ino, index);
- return ERR_CAST(inode);
- }
return d_splice_alias(inode, dentry);
}
@@ -435,7 +433,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
ta = kzalloc(sizeof(*ta), GFP_KERNEL);
if (!ta) {
- inode->i_nlink--;
+ drop_nlink(inode);
iput(inode);
return -ENOMEM;
}
@@ -458,7 +456,7 @@ static int __logfs_create(struct inode *dir, struct dentry *dentry,
abort_transaction(inode, ta);
li->li_flags |= LOGFS_IF_STILLBORN;
/* FIXME: truncate symlink */
- inode->i_nlink--;
+ drop_nlink(inode);
iput(inode);
goto out;
}
@@ -565,7 +563,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
ihold(inode);
- inode->i_nlink++;
+ inc_nlink(inode);
mark_inode_dirty_sync(inode);
return __logfs_create(dir, dentry, inode, NULL, 0);
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index c2ad702..b548c87 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -219,11 +219,20 @@ long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
}
-int logfs_fsync(struct file *file, int datasync)
+int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
struct super_block *sb = file->f_mapping->host->i_sb;
+ struct inode *inode = file->f_mapping->host;
+ int ret;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ if (ret)
+ return ret;
+ mutex_lock(&inode->i_mutex);
logfs_write_anchor(sb);
+ mutex_unlock(&inode->i_mutex);
+
return 0;
}
diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c
index edfea7a..7e441ad 100644
--- a/fs/logfs/inode.c
+++ b/fs/logfs/inode.c
@@ -93,7 +93,7 @@ static struct inode *__logfs_iget(struct super_block *sb, ino_t ino)
/* inode->i_nlink == 0 can be true when called from
* block validator */
/* set i_nlink to 0 to prevent caching */
- inode->i_nlink = 0;
+ clear_nlink(inode);
logfs_inode(inode)->li_flags |= LOGFS_IF_ZOMBIE;
iget_failed(inode);
if (!err)
@@ -199,7 +199,6 @@ static void logfs_init_inode(struct super_block *sb, struct inode *inode)
inode->i_blocks = 0;
inode->i_ctime = CURRENT_TIME;
inode->i_mtime = CURRENT_TIME;
- inode->i_nlink = 1;
li->li_refcount = 1;
INIT_LIST_HEAD(&li->li_freeing_list);
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index 57afd4a..398ecff 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -506,7 +506,7 @@ extern const struct file_operations logfs_reg_fops;
extern const struct address_space_operations logfs_reg_aops;
int logfs_readpage(struct file *file, struct page *page);
long logfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int logfs_fsync(struct file *file, int datasync);
+int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
/* gc.c */
u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec);
@@ -618,7 +618,6 @@ static inline int logfs_buf_recover(struct logfs_area *area, u64 ofs,
struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index);
void emergency_read_end(struct page *page);
void logfs_crash_dump(struct super_block *sb);
-void *memchr_inv(const void *s, int c, size_t n);
int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
int logfs_check_ds(struct logfs_disk_super *ds);
int logfs_write_sb(struct super_block *sb);
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index d8d0938..2ac4217 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -126,7 +126,7 @@ static void logfs_disk_to_inode(struct logfs_disk_inode *di, struct inode*inode)
inode->i_atime = be64_to_timespec(di->di_atime);
inode->i_ctime = be64_to_timespec(di->di_ctime);
inode->i_mtime = be64_to_timespec(di->di_mtime);
- inode->i_nlink = be32_to_cpu(di->di_refcount);
+ set_nlink(inode, be32_to_cpu(di->di_refcount));
inode->i_generation = be32_to_cpu(di->di_generation);
switch (inode->i_mode & S_IFMT) {
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index ce03a18..e795c234 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -13,6 +13,7 @@
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/blkdev.h>
+#include <linux/module.h>
#include <linux/mtd/mtd.h>
#include <linux/statfs.h>
#include <linux/buffer_head.h>
@@ -91,28 +92,6 @@ void logfs_crash_dump(struct super_block *sb)
}
/*
- * TODO: move to lib/string.c
- */
-/**
- * memchr_inv - Find a character in an area of memory.
- * @s: The memory area
- * @c: The byte to search for
- * @n: The size of the area.
- *
- * returns the address of the first character other than @c, or %NULL
- * if the whole buffer contains just @c.
- */
-void *memchr_inv(const void *s, int c, size_t n)
-{
- const unsigned char *p = s;
- while (n-- != 0)
- if ((unsigned char)c != *p++)
- return (void *)(p - 1);
-
- return NULL;
-}
-
-/*
* FIXME: There should be a reserve for root, similar to ext2.
*/
int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
diff --git a/fs/minix/bitmap.c b/fs/minix/bitmap.c
index 3f32bcb..ef175cb 100644
--- a/fs/minix/bitmap.c
+++ b/fs/minix/bitmap.c
@@ -16,38 +16,26 @@
#include <linux/bitops.h>
#include <linux/sched.h>
-static const int nibblemap[] = { 4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0 };
-
static DEFINE_SPINLOCK(bitmap_lock);
-static unsigned long count_free(struct buffer_head *map[], unsigned numblocks, __u32 numbits)
+/*
+ * bitmap consists of blocks filled with 16bit words
+ * bit set == busy, bit clear == free
+ * endianness is a mess, but for counting zero bits it really doesn't matter...
+ */
+static __u32 count_free(struct buffer_head *map[], unsigned blocksize, __u32 numbits)
{
- unsigned i, j, sum = 0;
- struct buffer_head *bh;
-
- for (i=0; i<numblocks-1; i++) {
- if (!(bh=map[i]))
- return(0);
- for (j=0; j<bh->b_size; j++)
- sum += nibblemap[bh->b_data[j] & 0xf]
- + nibblemap[(bh->b_data[j]>>4) & 0xf];
- }
+ __u32 sum = 0;
+ unsigned blocks = DIV_ROUND_UP(numbits, blocksize * 8);
- if (numblocks==0 || !(bh=map[numblocks-1]))
- return(0);
- i = ((numbits - (numblocks-1) * bh->b_size * 8) / 16) * 2;
- for (j=0; j<i; j++) {
- sum += nibblemap[bh->b_data[j] & 0xf]
- + nibblemap[(bh->b_data[j]>>4) & 0xf];
+ while (blocks--) {
+ unsigned words = blocksize / 2;
+ __u16 *p = (__u16 *)(*map++)->b_data;
+ while (words--)
+ sum += 16 - hweight16(*p++);
}
- i = numbits%16;
- if (i!=0) {
- i = *(__u16 *)(&bh->b_data[j]) | ~((1<<i) - 1);
- sum += nibblemap[i & 0xf] + nibblemap[(i>>4) & 0xf];
- sum += nibblemap[(i>>8) & 0xf] + nibblemap[(i>>12) & 0xf];
- }
- return(sum);
+ return sum;
}
void minix_free_block(struct inode *inode, unsigned long block)
@@ -105,10 +93,12 @@ int minix_new_block(struct inode * inode)
return 0;
}
-unsigned long minix_count_free_blocks(struct minix_sb_info *sbi)
+unsigned long minix_count_free_blocks(struct super_block *sb)
{
- return (count_free(sbi->s_zmap, sbi->s_zmap_blocks,
- sbi->s_nzones - sbi->s_firstdatazone + 1)
+ struct minix_sb_info *sbi = minix_sb(sb);
+ u32 bits = sbi->s_nzones - (sbi->s_firstdatazone + 1);
+
+ return (count_free(sbi->s_zmap, sb->s_blocksize, bits)
<< sbi->s_log_zone_size);
}
@@ -273,7 +263,10 @@ struct inode *minix_new_inode(const struct inode *dir, int mode, int *error)
return inode;
}
-unsigned long minix_count_free_inodes(struct minix_sb_info *sbi)
+unsigned long minix_count_free_inodes(struct super_block *sb)
{
- return count_free(sbi->s_imap, sbi->s_imap_blocks, sbi->s_ninodes + 1);
+ struct minix_sb_info *sbi = minix_sb(sb);
+ u32 bits = sbi->s_ninodes + 1;
+
+ return count_free(sbi->s_imap, sb->s_blocksize, bits);
}
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index adcdc0a..4d46a6a 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -263,6 +263,26 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
goto out_no_root;
}
+ /* Apparently minix can create filesystems that allocate more blocks for
+ * the bitmaps than needed. We simply ignore that, but verify it didn't
+ * create one with not enough blocks and bail out if so.
+ */
+ block = minix_blocks_needed(sbi->s_ninodes, s->s_blocksize);
+ if (sbi->s_imap_blocks < block) {
+ printk("MINIX-fs: file system does not have enough "
+ "imap blocks allocated. Refusing to mount\n");
+ goto out_iput;
+ }
+
+ block = minix_blocks_needed(
+ (sbi->s_nzones - (sbi->s_firstdatazone + 1)),
+ s->s_blocksize);
+ if (sbi->s_zmap_blocks < block) {
+ printk("MINIX-fs: file system does not have enough "
+ "zmap blocks allocated. Refusing to mount.\n");
+ goto out_iput;
+ }
+
ret = -ENOMEM;
s->s_root = d_alloc_root(root_inode);
if (!s->s_root)
@@ -276,9 +296,10 @@ static int minix_fill_super(struct super_block *s, void *data, int silent)
if (!(sbi->s_mount_state & MINIX_VALID_FS))
printk("MINIX-fs: mounting unchecked file system, "
"running fsck is recommended\n");
- else if (sbi->s_mount_state & MINIX_ERROR_FS)
+ else if (sbi->s_mount_state & MINIX_ERROR_FS)
printk("MINIX-fs: mounting file system with errors, "
"running fsck is recommended\n");
+
return 0;
out_iput:
@@ -339,10 +360,10 @@ static int minix_statfs(struct dentry *dentry, struct kstatfs *buf)
buf->f_type = sb->s_magic;
buf->f_bsize = sb->s_blocksize;
buf->f_blocks = (sbi->s_nzones - sbi->s_firstdatazone) << sbi->s_log_zone_size;
- buf->f_bfree = minix_count_free_blocks(sbi);
+ buf->f_bfree = minix_count_free_blocks(sb);
buf->f_bavail = buf->f_bfree;
buf->f_files = sbi->s_ninodes;
- buf->f_ffree = minix_count_free_inodes(sbi);
+ buf->f_ffree = minix_count_free_inodes(sb);
buf->f_namelen = sbi->s_namelen;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
@@ -446,7 +467,7 @@ static struct inode *V1_minix_iget(struct inode *inode)
inode->i_mode = raw_inode->i_mode;
inode->i_uid = (uid_t)raw_inode->i_uid;
inode->i_gid = (gid_t)raw_inode->i_gid;
- inode->i_nlink = raw_inode->i_nlinks;
+ set_nlink(inode, raw_inode->i_nlinks);
inode->i_size = raw_inode->i_size;
inode->i_mtime.tv_sec = inode->i_atime.tv_sec = inode->i_ctime.tv_sec = raw_inode->i_time;
inode->i_mtime.tv_nsec = 0;
@@ -479,7 +500,7 @@ static struct inode *V2_minix_iget(struct inode *inode)
inode->i_mode = raw_inode->i_mode;
inode->i_uid = (uid_t)raw_inode->i_uid;
inode->i_gid = (gid_t)raw_inode->i_gid;
- inode->i_nlink = raw_inode->i_nlinks;
+ set_nlink(inode, raw_inode->i_nlinks);
inode->i_size = raw_inode->i_size;
inode->i_mtime.tv_sec = raw_inode->i_mtime;
inode->i_atime.tv_sec = raw_inode->i_atime;
@@ -596,8 +617,7 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
- struct inode *dir = dentry->d_parent->d_inode;
- struct super_block *sb = dir->i_sb;
+ struct super_block *sb = dentry->d_sb;
generic_fillattr(dentry->d_inode, stat);
if (INODE_VERSION(dentry->d_inode) == MINIX_V1)
stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
diff --git a/fs/minix/minix.h b/fs/minix/minix.h
index 341e212..26bbd55 100644
--- a/fs/minix/minix.h
+++ b/fs/minix/minix.h
@@ -48,10 +48,10 @@ extern struct minix_inode * minix_V1_raw_inode(struct super_block *, ino_t, stru
extern struct minix2_inode * minix_V2_raw_inode(struct super_block *, ino_t, struct buffer_head **);
extern struct inode * minix_new_inode(const struct inode *, int, int *);
extern void minix_free_inode(struct inode * inode);
-extern unsigned long minix_count_free_inodes(struct minix_sb_info *sbi);
+extern unsigned long minix_count_free_inodes(struct super_block *sb);
extern int minix_new_block(struct inode * inode);
extern void minix_free_block(struct inode *inode, unsigned long block);
-extern unsigned long minix_count_free_blocks(struct minix_sb_info *sbi);
+extern unsigned long minix_count_free_blocks(struct super_block *sb);
extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *);
extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
@@ -88,6 +88,11 @@ static inline struct minix_inode_info *minix_i(struct inode *inode)
return list_entry(inode, struct minix_inode_info, vfs_inode);
}
+static inline unsigned minix_blocks_needed(unsigned bits, unsigned blocksize)
+{
+ return DIV_ROUND_UP(bits, blocksize * 8);
+}
+
#if defined(CONFIG_MINIX_FS_NATIVE_ENDIAN) && \
defined(CONFIG_MINIX_FS_BIG_ENDIAN_16BIT_INDEXED)
@@ -125,7 +130,7 @@ static inline int minix_find_first_zero_bit(const void *vaddr, unsigned size)
if (!size)
return 0;
- size = (size >> 4) + ((size & 15) > 0);
+ size >>= 4;
while (*p++ == 0xffff) {
if (--size == 0)
return (p - addr) << 4;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 9c51f62..efa38a9 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -391,7 +391,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
- dent = list_entry(next, struct dentry, d_u.d_child);
+ dent = list_entry(next, struct dentry, d_child);
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
dget(dent);
@@ -1033,15 +1033,6 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry)
DPRINTK("ncp_rmdir: removing %s/%s\n",
dentry->d_parent->d_name.name, dentry->d_name.name);
- /*
- * fail with EBUSY if there are still references to this
- * directory.
- */
- dentry_unhash(dentry);
- error = -EBUSY;
- if (!d_unhashed(dentry))
- goto out;
-
len = sizeof(__name);
error = ncp_io2vol(server, __name, &len, dentry->d_name.name,
dentry->d_name.len, !ncp_preserve_case(dir));
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 0ed65e0..64a3264 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -20,9 +20,9 @@
#include "ncp_fs.h"
-static int ncp_fsync(struct file *file, int datasync)
+static int ncp_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- return 0;
+ return filemap_write_and_wait_range(file->f_mapping, start, end);
}
/*
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 202f370..cbd1a61 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -228,7 +228,7 @@ static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo)
DDPRINTK("ncp_read_inode: inode->i_mode = %u\n", inode->i_mode);
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_uid = server->m.uid;
inode->i_gid = server->m.gid;
@@ -548,7 +548,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
error = bdi_setup_and_register(&server->bdi, "ncpfs", BDI_CAP_MAP_COPY);
if (error)
- goto out_bdi;
+ goto out_fput;
server->ncp_filp = ncp_filp;
server->ncp_sock = sock;
@@ -559,7 +559,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent)
error = -EBADF;
server->info_filp = fget(data.info_fd);
if (!server->info_filp)
- goto out_fput;
+ goto out_bdi;
error = -ENOTSOCK;
sock_inode = server->info_filp->f_path.dentry->d_inode;
if (!S_ISSOCK(sock_inode->i_mode))
@@ -746,9 +746,9 @@ out_nls:
out_fput2:
if (server->info_filp)
fput(server->info_filp);
-out_fput:
- bdi_destroy(&server->bdi);
out_bdi:
+ bdi_destroy(&server->bdi);
+out_fput:
/* 23/12/1998 Marcin Dalecki <dalecki@cs.net.pl>:
*
* The previously used put_filp(ncp_filp); was bogus, since
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 790e92a..ea6f706 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -445,7 +445,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
result = -EIO;
}
}
- result = 0;
}
mutex_unlock(&server->root_setup_lock);
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 09881e6..64a817a 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -194,7 +194,7 @@ ncp_renew_dentries(struct dentry *parent)
spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
- dentry = list_entry(next, struct dentry, d_u.d_child);
+ dentry = list_entry(next, struct dentry, d_child);
if (dentry->d_fsdata == NULL)
ncp_age_dentry(server, dentry);
@@ -216,7 +216,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
- dentry = list_entry(next, struct dentry, d_u.d_child);
+ dentry = list_entry(next, struct dentry, d_child);
dentry->d_fsdata = NULL;
ncp_age_dentry(server, dentry);
next = next->next;
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
deleted file mode 100644
index 124e8fc..0000000
--- a/fs/nfsctl.c
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * fs/nfsctl.c
- *
- * This should eventually move to userland.
- *
- */
-#include <linux/types.h>
-#include <linux/file.h>
-#include <linux/fs.h>
-#include <linux/nfsd/syscall.h>
-#include <linux/cred.h>
-#include <linux/sched.h>
-#include <linux/linkage.h>
-#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/syscalls.h>
-#include <asm/uaccess.h>
-
-/*
- * open a file on nfsd fs
- */
-
-static struct file *do_open(char *name, int flags)
-{
- struct vfsmount *mnt;
- struct file *file;
-
- mnt = do_kern_mount("nfsd", 0, "nfsd", NULL);
- if (IS_ERR(mnt))
- return (struct file *)mnt;
-
- file = file_open_root(mnt->mnt_root, mnt, name, flags);
-
- mntput(mnt); /* drop do_kern_mount reference */
- return file;
-}
-
-static struct {
- char *name; int wsize; int rsize;
-} map[] = {
- [NFSCTL_SVC] = {
- .name = ".svc",
- .wsize = sizeof(struct nfsctl_svc)
- },
- [NFSCTL_ADDCLIENT] = {
- .name = ".add",
- .wsize = sizeof(struct nfsctl_client)
- },
- [NFSCTL_DELCLIENT] = {
- .name = ".del",
- .wsize = sizeof(struct nfsctl_client)
- },
- [NFSCTL_EXPORT] = {
- .name = ".export",
- .wsize = sizeof(struct nfsctl_export)
- },
- [NFSCTL_UNEXPORT] = {
- .name = ".unexport",
- .wsize = sizeof(struct nfsctl_export)
- },
- [NFSCTL_GETFD] = {
- .name = ".getfd",
- .wsize = sizeof(struct nfsctl_fdparm),
- .rsize = NFS_FHSIZE
- },
- [NFSCTL_GETFS] = {
- .name = ".getfs",
- .wsize = sizeof(struct nfsctl_fsparm),
- .rsize = sizeof(struct knfsd_fh)
- },
-};
-
-SYSCALL_DEFINE3(nfsservctl, int, cmd, struct nfsctl_arg __user *, arg,
- void __user *, res)
-{
- struct file *file;
- void __user *p = &arg->u;
- int version;
- int err;
-
- if (copy_from_user(&version, &arg->ca_version, sizeof(int)))
- return -EFAULT;
-
- if (version != NFSCTL_VERSION)
- return -EINVAL;
-
- if (cmd < 0 || cmd >= ARRAY_SIZE(map) || !map[cmd].name)
- return -EINVAL;
-
- file = do_open(map[cmd].name, map[cmd].rsize ? O_RDWR : O_WRONLY);
- if (IS_ERR(file))
- return PTR_ERR(file);
- err = file->f_op->write(file, p, map[cmd].wsize, &file->f_pos);
- if (err >= 0 && map[cmd].rsize)
- err = file->f_op->read(file, res, map[cmd].rsize, &file->f_pos);
- if (err >= 0)
- err = 0;
- fput(file);
- return err;
-}
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h
index 2142b1c..53c27ea 100644
--- a/fs/ntfs/debug.h
+++ b/fs/ntfs/debug.h
@@ -30,8 +30,9 @@
extern int debug_msgs;
-extern void __ntfs_debug(const char *file, int line, const char *function,
- const char *format, ...) __attribute__ ((format (printf, 4, 5)));
+extern __printf(4, 5)
+void __ntfs_debug(const char *file, int line, const char *function,
+ const char *format, ...);
/**
* ntfs_debug - write a debug level message to syslog
* @f: a printf format string containing the message
@@ -52,12 +53,14 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl);
#endif /* !DEBUG */
-extern void __ntfs_warning(const char *function, const struct super_block *sb,
- const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
+extern __printf(3, 4)
+void __ntfs_warning(const char *function, const struct super_block *sb,
+ const char *fmt, ...);
#define ntfs_warning(sb, f, a...) __ntfs_warning(__func__, sb, f, ##a)
-extern void __ntfs_error(const char *function, const struct super_block *sb,
- const char *fmt, ...) __attribute__ ((format (printf, 3, 4)));
+extern __printf(3, 4)
+void __ntfs_error(const char *function, const struct super_block *sb,
+ const char *fmt, ...);
#define ntfs_error(sb, f, a...) __ntfs_error(__func__, sb, f, ##a)
#endif /* _LINUX_NTFS_DEBUG_H */
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index 0f48e7c..99e3610 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -1527,13 +1527,20 @@ static int ntfs_dir_open(struct inode *vi, struct file *filp)
* this problem for now. We do write the $BITMAP attribute if it is present
* which is the important one for a directory so things are not too bad.
*/
-static int ntfs_dir_fsync(struct file *filp, int datasync)
+static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *bmp_vi, *vi = filp->f_mapping->host;
int err, ret;
ntfs_attr na;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+
+ err = filemap_write_and_wait_range(vi->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&vi->i_mutex);
+
BUG_ON(!S_ISDIR(vi->i_mode));
/* If the bitmap attribute inode is in memory sync it, too. */
na.mft_no = vi->i_ino;
@@ -1555,6 +1562,7 @@ static int ntfs_dir_fsync(struct file *filp, int datasync)
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+ mutex_unlock(&vi->i_mutex);
return ret;
}
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index f4b1057..c587e2d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -1832,9 +1832,8 @@ static ssize_t ntfs_file_buffered_write(struct kiocb *iocb,
* fails again.
*/
if (unlikely(NInoTruncateFailed(ni))) {
- down_write(&vi->i_alloc_sem);
+ inode_dio_wait(vi);
err = ntfs_truncate(vi);
- up_write(&vi->i_alloc_sem);
if (err || NInoTruncateFailed(ni)) {
if (!err)
err = -EIO;
@@ -2153,12 +2152,19 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
* with this inode but since we have no simple way of getting to them we ignore
* this problem for now.
*/
-static int ntfs_file_fsync(struct file *filp, int datasync)
+static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
{
struct inode *vi = filp->f_mapping->host;
int err, ret = 0;
ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
+
+ err = filemap_write_and_wait_range(vi->i_mapping, start, end);
+ if (err)
+ return err;
+ mutex_lock(&vi->i_mutex);
+
BUG_ON(S_ISDIR(vi->i_mode));
if (!datasync || !NInoNonResident(NTFS_I(vi)))
ret = __ntfs_write_inode(vi, 1);
@@ -2176,6 +2182,7 @@ static int ntfs_file_fsync(struct file *filp, int datasync)
else
ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx. Error "
"%u.", datasync ? "data" : "", vi->i_ino, -ret);
+ mutex_unlock(&vi->i_mutex);
return ret;
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index c05d6dc..97e2dac 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -612,7 +612,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
* might be tricky due to vfs interactions. Need to think about this
* some more when implementing the unlink command.
*/
- vi->i_nlink = le16_to_cpu(m->link_count);
+ set_nlink(vi, le16_to_cpu(m->link_count));
/*
* FIXME: Reparse points can have the directory bit set even though
* they would be S_IFLNK. Need to deal with this further below when we
@@ -634,7 +634,7 @@ static int ntfs_read_locked_inode(struct inode *vi)
vi->i_mode &= ~vol->dmask;
/* Things break without this kludge! */
if (vi->i_nlink > 1)
- vi->i_nlink = 1;
+ set_nlink(vi, 1);
} else {
vi->i_mode |= S_IFREG;
/* Apply the file permissions mask set in the mount options. */
@@ -1242,7 +1242,7 @@ static int ntfs_read_locked_attr_inode(struct inode *base_vi, struct inode *vi)
vi->i_version = base_vi->i_version;
vi->i_uid = base_vi->i_uid;
vi->i_gid = base_vi->i_gid;
- vi->i_nlink = base_vi->i_nlink;
+ set_nlink(vi, base_vi->i_nlink);
vi->i_mtime = base_vi->i_mtime;
vi->i_ctime = base_vi->i_ctime;
vi->i_atime = base_vi->i_atime;
@@ -1508,7 +1508,7 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
vi->i_version = base_vi->i_version;
vi->i_uid = base_vi->i_uid;
vi->i_gid = base_vi->i_gid;
- vi->i_nlink = base_vi->i_nlink;
+ set_nlink(vi, base_vi->i_nlink);
vi->i_mtime = base_vi->i_mtime;
vi->i_ctime = base_vi->i_ctime;
vi->i_atime = base_vi->i_atime;
@@ -2357,12 +2357,7 @@ static const char *es = " Leaving inconsistent metadata. Unmount and run "
*
* Returns 0 on success or -errno on error.
*
- * Called with ->i_mutex held. In all but one case ->i_alloc_sem is held for
- * writing. The only case in the kernel where ->i_alloc_sem is not held is
- * mm/filemap.c::generic_file_buffered_write() where vmtruncate() is called
- * with the current i_size as the offset. The analogous place in NTFS is in
- * fs/ntfs/file.c::ntfs_file_buffered_write() where we call vmtruncate() again
- * without holding ->i_alloc_sem.
+ * Called with ->i_mutex held.
*/
int ntfs_truncate(struct inode *vi)
{
@@ -2887,8 +2882,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
* We also abort all changes of user, group, and mode as we do not implement
* the NTFS ACLs yet.
*
- * Called with ->i_mutex held. For the ATTR_SIZE (i.e. ->truncate) case, also
- * called with ->i_alloc_sem held for writing.
+ * Called with ->i_mutex held.
*/
int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
{
diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h
index 2dabf81..fe8e7e9 100644
--- a/fs/ntfs/inode.h
+++ b/fs/ntfs/inode.h
@@ -24,7 +24,7 @@
#ifndef _LINUX_NTFS_INODE_H
#define _LINUX_NTFS_INODE_H
-#include <asm/atomic.h>
+#include <linux/atomic.h>
#include <linux/fs.h>
#include <linux/list.h>
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c
index 3b8d397..98e5442 100644
--- a/fs/omfs/dir.c
+++ b/fs/omfs/dir.c
@@ -93,7 +93,7 @@ int omfs_make_empty(struct inode *inode, struct super_block *sb)
memset(bh->b_data, 0, sizeof(struct omfs_inode));
- if (inode->i_mode & S_IFDIR) {
+ if (S_ISDIR(inode->i_mode)) {
memset(&bh->b_data[OMFS_DIR_START], 0xff,
sbi->s_sys_blocksize - OMFS_DIR_START);
} else
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index e043c4c..f58f1c4 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -361,7 +361,7 @@ nomem:
}
enum {
- Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask
+ Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err
};
static const match_table_t tokens = {
@@ -370,6 +370,7 @@ static const match_table_t tokens = {
{Opt_umask, "umask=%o"},
{Opt_dmask, "dmask=%o"},
{Opt_fmask, "fmask=%o"},
+ {Opt_err, NULL},
};
static int parse_options(char *options, struct omfs_sb_info *sbi)
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index a2a5bff..e4e0ff7 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -242,7 +242,7 @@ found:
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
inode->i_op = &openprom_inode_operations;
inode->i_fop = &openprom_operations;
- inode->i_nlink = 2;
+ set_nlink(inode, 2);
break;
case op_inode_prop:
if (!strcmp(dp->name, "options") && (len == 17) &&
@@ -251,7 +251,7 @@ found:
else
inode->i_mode = S_IFREG | S_IRUGO;
inode->i_fop = &openpromfs_prop_ops;
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_size = ent_oi->u.prop->length;
break;
}
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 811960a..18c58e5 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -237,22 +237,22 @@ ssize_t part_size_show(struct device *dev,
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
}
-ssize_t part_ro_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t part_ro_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%d\n", p->policy ? 1 : 0);
}
-ssize_t part_alignment_offset_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t part_alignment_offset_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset);
}
-ssize_t part_discard_alignment_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t part_discard_alignment_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct hd_struct *p = dev_to_part(dev);
return sprintf(buf, "%u\n", p->discard_alignment);
@@ -361,26 +361,16 @@ static const struct attribute_group *part_attr_groups[] = {
static void part_release(struct device *dev)
{
struct hd_struct *p = dev_to_part(dev);
+ blk_free_devt(dev->devt);
free_part_stats(p);
free_part_info(p);
kfree(p);
}
-static int part_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- struct hd_struct *part = dev_to_part(dev);
-
- add_uevent_var(env, "PARTN=%u", part->partno);
- if (part->info && part->info->volname[0])
- add_uevent_var(env, "PARTNAME=%s", part->info->volname);
- return 0;
-}
-
struct device_type part_type = {
.name = "partition",
.groups = part_attr_groups,
.release = part_release,
- .uevent = part_uevent,
};
static void delete_partition_rcu_cb(struct rcu_head *head)
@@ -410,7 +400,6 @@ void delete_partition(struct gendisk *disk, int partno)
if (!part)
return;
- blk_free_devt(part_devt(part));
rcu_assign_pointer(ptbl->part[partno], NULL);
rcu_assign_pointer(ptbl->last_lookup, NULL);
kobject_put(part->holder_dir);
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c
index af9fdf0..bd8ae78 100644
--- a/fs/partitions/ldm.c
+++ b/fs/partitions/ldm.c
@@ -49,18 +49,20 @@
#define ldm_error(f, a...) _ldm_printk (KERN_ERR, __func__, f, ##a)
#define ldm_info(f, a...) _ldm_printk (KERN_INFO, __func__, f, ##a)
-__attribute__ ((format (printf, 3, 4)))
-static void _ldm_printk (const char *level, const char *function,
- const char *fmt, ...)
+static __printf(3, 4)
+void _ldm_printk(const char *level, const char *function, const char *fmt, ...)
{
- static char buf[128];
+ struct va_format vaf;
va_list args;
va_start (args, fmt);
- vsnprintf (buf, sizeof (buf), fmt, args);
- va_end (args);
- printk ("%s%s(): %s\n", level, function, buf);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ printk("%s%s(): %pV\n", level, function, &vaf);
+
+ va_end(args);
}
/**
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index 977ed27..379a02d 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -24,6 +24,7 @@
#include <linux/highmem.h>
#include <linux/time.h>
#include <linux/init.h>
+#include <linux/list.h>
#include <linux/string.h>
#include <linux/mount.h>
#include <linux/ramfs.h>
@@ -32,15 +33,21 @@
#include <linux/magic.h>
#include <linux/pstore.h>
#include <linux/slab.h>
+#include <linux/spinlock.h>
#include <linux/uaccess.h>
#include "internal.h"
#define PSTORE_NAMELEN 64
+static DEFINE_SPINLOCK(allpstore_lock);
+static LIST_HEAD(allpstore);
+
struct pstore_private {
+ struct list_head list;
+ struct pstore_info *psi;
+ enum pstore_type_id type;
u64 id;
- int (*erase)(u64);
ssize_t size;
char data[];
};
@@ -73,15 +80,23 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
{
struct pstore_private *p = dentry->d_inode->i_private;
- p->erase(p->id);
+ p->psi->erase(p->type, p->id, p->psi);
return simple_unlink(dir, dentry);
}
static void pstore_evict_inode(struct inode *inode)
{
+ struct pstore_private *p = inode->i_private;
+ unsigned long flags;
+
end_writeback(inode);
- kfree(inode->i_private);
+ if (p) {
+ spin_lock_irqsave(&allpstore_lock, flags);
+ list_del(&p->list);
+ spin_unlock_irqrestore(&allpstore_lock, flags);
+ kfree(p);
+ }
}
static const struct inode_operations pstore_dir_inode_operations = {
@@ -175,15 +190,29 @@ int pstore_is_mounted(void)
* Set the mtime & ctime to the date that this record was originally stored.
*/
int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
- char *data, size_t size,
- struct timespec time, int (*erase)(u64))
+ char *data, size_t size, struct timespec time,
+ struct pstore_info *psi)
{
struct dentry *root = pstore_sb->s_root;
struct dentry *dentry;
struct inode *inode;
- int rc;
+ int rc = 0;
char name[PSTORE_NAMELEN];
- struct pstore_private *private;
+ struct pstore_private *private, *pos;
+ unsigned long flags;
+
+ spin_lock_irqsave(&allpstore_lock, flags);
+ list_for_each_entry(pos, &allpstore, list) {
+ if (pos->type == type &&
+ pos->id == id &&
+ pos->psi == psi) {
+ rc = -EEXIST;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&allpstore_lock, flags);
+ if (rc)
+ return rc;
rc = -ENOMEM;
inode = pstore_get_inode(pstore_sb, root->d_inode, S_IFREG | 0444, 0);
@@ -192,8 +221,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
private = kmalloc(sizeof *private + size, GFP_KERNEL);
if (!private)
goto fail_alloc;
+ private->type = type;
private->id = id;
- private->erase = erase;
+ private->psi = psi;
switch (type) {
case PSTORE_TYPE_DMESG:
@@ -227,6 +257,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id,
d_add(dentry, inode);
+ spin_lock_irqsave(&allpstore_lock, flags);
+ list_add(&private->list, &allpstore);
+ spin_unlock_irqrestore(&allpstore_lock, flags);
+
mutex_unlock(&root->d_inode->i_mutex);
return 0;
@@ -275,7 +309,7 @@ int pstore_fill_super(struct super_block *sb, void *data, int silent)
goto fail;
}
- pstore_get_records();
+ pstore_get_records(0);
return 0;
fail:
diff --git a/fs/pstore/internal.h b/fs/pstore/internal.h
index 8c9f23e..3bde461 100644
--- a/fs/pstore/internal.h
+++ b/fs/pstore/internal.h
@@ -1,6 +1,6 @@
extern void pstore_set_kmsg_bytes(int);
-extern void pstore_get_records(void);
+extern void pstore_get_records(int);
extern int pstore_mkfile(enum pstore_type_id, char *psname, u64 id,
char *data, size_t size,
- struct timespec time, int (*erase)(u64));
+ struct timespec time, struct pstore_info *psi);
extern int pstore_is_mounted(void);
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index f2c3ff2..45d18d1 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -25,18 +25,38 @@
#include <linux/module.h>
#include <linux/pstore.h>
#include <linux/string.h>
+#include <linux/timer.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
+#include <linux/hardirq.h>
+#include <linux/workqueue.h>
#include "internal.h"
/*
+ * We defer making "oops" entries appear in pstore - see
+ * whether the system is actually still running well enough
+ * to let someone see the entry
+ */
+#define PSTORE_INTERVAL (60 * HZ)
+
+static int pstore_new_entry;
+
+static void pstore_timefunc(unsigned long);
+static DEFINE_TIMER(pstore_timer, pstore_timefunc, 0, 0);
+
+static void pstore_dowork(struct work_struct *);
+static DECLARE_WORK(pstore_work, pstore_dowork);
+
+/*
* pstore_lock just protects "psinfo" during
* calls to pstore_register()
*/
static DEFINE_SPINLOCK(pstore_lock);
static struct pstore_info *psinfo;
+static char *backend;
+
/* How much of the console log to snapshot */
static unsigned long kmsg_bytes = 10240;
@@ -52,6 +72,27 @@ static char *reason_str[] = {
"Oops", "Panic", "Kexec", "Restart", "Halt", "Poweroff", "Emergency"
};
+bool pstore_cannot_block_path(enum kmsg_dump_reason reason)
+{
+ /*
+ * In case of NMI path, pstore shouldn't be blocked
+ * regardless of reason.
+ */
+ if (in_nmi())
+ return true;
+
+ switch (reason) {
+ /* In panic case, other cpus are stopped by smp_send_stop(). */
+ case KMSG_DUMP_PANIC:
+ /* Emergency restart shouldn't be blocked by spin lock. */
+ case KMSG_DUMP_EMERG:
+ return true;
+ default:
+ return false;
+ }
+}
+EXPORT_SYMBOL_GPL(pstore_cannot_block_path);
+
/*
* callback from kmsg_dump. (s2,l2) has the most recently
* written bytes, older bytes are in (s1,l1). Save as much
@@ -67,18 +108,28 @@ static void pstore_dump(struct kmsg_dumper *dumper,
unsigned long size, total = 0;
char *dst, *why;
u64 id;
- int hsize, part = 1;
+ int hsize, ret;
+ unsigned int part = 1;
+ unsigned long flags = 0;
+ int is_locked = 0;
if (reason < ARRAY_SIZE(reason_str))
why = reason_str[reason];
else
why = "Unknown";
- mutex_lock(&psinfo->buf_mutex);
+ if (pstore_cannot_block_path(reason)) {
+ is_locked = spin_trylock_irqsave(&psinfo->buf_lock, flags);
+ if (!is_locked) {
+ pr_err("pstore dump routine blocked in %s path, may corrupt error record\n"
+ , in_nmi() ? "NMI" : why);
+ }
+ } else
+ spin_lock_irqsave(&psinfo->buf_lock, flags);
oopscount++;
while (total < kmsg_bytes) {
dst = psinfo->buf;
- hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part++);
+ hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
size = psinfo->bufsize - hsize;
dst += hsize;
@@ -94,16 +145,20 @@ static void pstore_dump(struct kmsg_dumper *dumper,
memcpy(dst, s1 + s1_start, l1_cpy);
memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
- id = psinfo->write(PSTORE_TYPE_DMESG, hsize + l1_cpy + l2_cpy);
- if (reason == KMSG_DUMP_OOPS && pstore_is_mounted())
- pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id,
- psinfo->buf, hsize + l1_cpy + l2_cpy,
- CURRENT_TIME, psinfo->erase);
+ ret = psinfo->write(PSTORE_TYPE_DMESG, &id, part,
+ hsize + l1_cpy + l2_cpy, psinfo);
+ if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
+ pstore_new_entry = 1;
l1 -= l1_cpy;
l2 -= l2_cpy;
total += l1_cpy + l2_cpy;
+ part++;
}
- mutex_unlock(&psinfo->buf_mutex);
+ if (pstore_cannot_block_path(reason)) {
+ if (is_locked)
+ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
+ } else
+ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
}
static struct kmsg_dumper pstore_dumper = {
@@ -128,7 +183,14 @@ int pstore_register(struct pstore_info *psi)
spin_unlock(&pstore_lock);
return -EBUSY;
}
+
+ if (backend && strcmp(backend, psi->name)) {
+ spin_unlock(&pstore_lock);
+ return -EINVAL;
+ }
+
psinfo = psi;
+ mutex_init(&psinfo->read_mutex);
spin_unlock(&pstore_lock);
if (owner && !try_module_get(owner)) {
@@ -137,21 +199,27 @@ int pstore_register(struct pstore_info *psi)
}
if (pstore_is_mounted())
- pstore_get_records();
+ pstore_get_records(0);
kmsg_dump_register(&pstore_dumper);
+ pstore_timer.expires = jiffies + PSTORE_INTERVAL;
+ add_timer(&pstore_timer);
+
return 0;
}
EXPORT_SYMBOL_GPL(pstore_register);
/*
- * Read all the records from the persistent store. Create and
- * file files in our filesystem.
+ * Read all the records from the persistent store. Create
+ * files in our filesystem. Don't warn about -EEXIST errors
+ * when we are re-scanning the backing store looking to add new
+ * error records.
*/
-void pstore_get_records(void)
+void pstore_get_records(int quiet)
{
struct pstore_info *psi = psinfo;
+ char *buf = NULL;
ssize_t size;
u64 id;
enum pstore_type_id type;
@@ -161,32 +229,52 @@ void pstore_get_records(void)
if (!psi)
return;
- mutex_lock(&psinfo->buf_mutex);
+ mutex_lock(&psi->read_mutex);
rc = psi->open(psi);
if (rc)
goto out;
- while ((size = psi->read(&id, &type, &time)) > 0) {
- if (pstore_mkfile(type, psi->name, id, psi->buf, (size_t)size,
- time, psi->erase))
+ while ((size = psi->read(&id, &type, &time, &buf, psi)) > 0) {
+ rc = pstore_mkfile(type, psi->name, id, buf, (size_t)size,
+ time, psi);
+ kfree(buf);
+ buf = NULL;
+ if (rc && (rc != -EEXIST || !quiet))
failed++;
}
psi->close(psi);
out:
- mutex_unlock(&psinfo->buf_mutex);
+ mutex_unlock(&psi->read_mutex);
if (failed)
printk(KERN_WARNING "pstore: failed to load %d record(s) from '%s'\n",
failed, psi->name);
}
+static void pstore_dowork(struct work_struct *work)
+{
+ pstore_get_records(1);
+}
+
+static void pstore_timefunc(unsigned long dummy)
+{
+ if (pstore_new_entry) {
+ pstore_new_entry = 0;
+ schedule_work(&pstore_work);
+ }
+
+ mod_timer(&pstore_timer, jiffies + PSTORE_INTERVAL);
+}
+
/*
* Call platform driver to write a record to the
* persistent store.
*/
int pstore_write(enum pstore_type_id type, char *buf, size_t size)
{
- u64 id;
+ u64 id;
+ int ret;
+ unsigned long flags;
if (!psinfo)
return -ENODEV;
@@ -194,14 +282,17 @@ int pstore_write(enum pstore_type_id type, char *buf, size_t size)
if (size > psinfo->bufsize)
return -EFBIG;
- mutex_lock(&psinfo->buf_mutex);
+ spin_lock_irqsave(&psinfo->buf_lock, flags);
memcpy(psinfo->buf, buf, size);
- id = psinfo->write(type, size);
- if (pstore_is_mounted())
+ ret = psinfo->write(type, &id, 0, size, psinfo);
+ if (ret == 0 && pstore_is_mounted())
pstore_mkfile(PSTORE_TYPE_DMESG, psinfo->name, id, psinfo->buf,
- size, CURRENT_TIME, psinfo->erase);
- mutex_unlock(&psinfo->buf_mutex);
+ size, CURRENT_TIME, psinfo);
+ spin_unlock_irqrestore(&psinfo->buf_lock, flags);
return 0;
}
EXPORT_SYMBOL_GPL(pstore_write);
+
+module_param(backend, charp, 0444);
+MODULE_PARM_DESC(backend, "Pstore backend to use");
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 2b06466..3bdd214 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -379,7 +379,7 @@ struct inode *qnx4_iget(struct super_block *sb, unsigned long ino)
inode->i_mode = le16_to_cpu(raw_inode->di_mode);
inode->i_uid = (uid_t)le16_to_cpu(raw_inode->di_uid);
inode->i_gid = (gid_t)le16_to_cpu(raw_inode->di_gid);
- inode->i_nlink = le16_to_cpu(raw_inode->di_nlink);
+ set_nlink(inode, le16_to_cpu(raw_inode->di_nlink));
inode->i_size = le32_to_cpu(raw_inode->di_size);
inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->di_mtime);
inode->i_mtime.tv_nsec = 0;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 5b572c8..3d5d717 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -580,9 +580,17 @@ int dquot_scan_active(struct super_block *sb,
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
- ret = fn(dquot, priv);
- if (ret < 0)
- goto out;
+ /*
+ * ->release_dquot() can be racing with us. Our reference
+ * protects us from new calls to it so just wait for any
+ * outstanding call and recheck the DQ_ACTIVE_B after that.
+ */
+ wait_on_dquot(dquot);
+ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+ }
spin_lock(&dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 10b6be3..35f4b0e 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -286,7 +286,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
/* caller already holds s_umount */
if (sb->s_flags & MS_RDONLY)
return -EROFS;
- writeback_inodes_sb(sb);
+ writeback_inodes_sb(sb, WB_REASON_SYNC);
return 0;
default:
return -EINVAL;
@@ -363,12 +363,15 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
}
sb = quotactl_block(special);
- if (IS_ERR(sb))
- return PTR_ERR(sb);
+ if (IS_ERR(sb)) {
+ ret = PTR_ERR(sb);
+ goto out;
+ }
ret = do_quotactl(sb, type, cmds, id, addr, pathp);
drop_super(sb);
+out:
if (pathp && !IS_ERR(pathp))
path_put(pathp);
return ret;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index eacb166..462ceb3 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -23,7 +23,6 @@
* caches is sufficient.
*/
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/pagemap.h>
#include <linux/highmem.h>
@@ -288,14 +287,7 @@ static int __init init_ramfs_fs(void)
{
return register_filesystem(&ramfs_fs_type);
}
-
-static void __exit exit_ramfs_fs(void)
-{
- unregister_filesystem(&ramfs_fs_type);
-}
-
module_init(init_ramfs_fs)
-module_exit(exit_ramfs_fs)
int __init init_rootfs(void)
{
@@ -311,5 +303,3 @@ int __init init_rootfs(void)
return err;
}
-
-MODULE_LICENSE("GPL");
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 2305e31..8b4089f 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -337,7 +337,7 @@ static struct inode *romfs_iget(struct super_block *sb, unsigned long pos)
inode->i_metasize = (ROMFH_SIZE + nlen + 1 + ROMFH_PAD) & ROMFH_MASK;
inode->i_dataoffset = pos + inode->i_metasize;
- i->i_nlink = 1; /* Hard to decide.. */
+ set_nlink(i, 1); /* Hard to decide.. */
i->i_size = be32_to_cpu(ri.size);
i->i_mtime.tv_sec = i->i_atime.tv_sec = i->i_ctime.tv_sec = 0;
i->i_mtime.tv_nsec = i->i_atime.tv_nsec = i->i_ctime.tv_nsec = 0;
diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig
index 7797218..c70111e 100644
--- a/fs/squashfs/Kconfig
+++ b/fs/squashfs/Kconfig
@@ -1,7 +1,6 @@
config SQUASHFS
tristate "SquashFS 4.0 - Squashed file system support"
depends on BLOCK
- select ZLIB_INFLATE
help
Saying Y here includes support for SquashFS 4.0 (a Compressed
Read-Only File System). Squashfs is a highly compressed read-only
@@ -20,9 +19,9 @@ config SQUASHFS
If you want to compile this as a module ( = code which can be
inserted in and removed from the running kernel whenever you want),
- say M here and read <file:Documentation/modules.txt>. The module
- will be called squashfs. Note that the root file system (the one
- containing the directory /) cannot be compiled as a module.
+ say M here. The module will be called squashfs. Note that the root
+ file system (the one containing the directory /) cannot be compiled
+ as a module.
If unsure, say N.
@@ -36,6 +35,19 @@ config SQUASHFS_XATTR
If unsure, say N.
+config SQUASHFS_ZLIB
+ bool "Include support for ZLIB compressed file systems"
+ depends on SQUASHFS
+ select ZLIB_INFLATE
+ default y
+ help
+ ZLIB compression is the standard compression used by Squashfs
+ file systems. It offers a good trade-off between compression
+ achieved and the amount of CPU time and memory necessary to
+ compress and decompress.
+
+ If unsure, say Y.
+
config SQUASHFS_LZO
bool "Include support for LZO compressed file systems"
depends on SQUASHFS
@@ -66,6 +78,28 @@ config SQUASHFS_XZ
If unsure, say N.
+config SQUASHFS_4K_DEVBLK_SIZE
+ bool "Use 4K device block size?"
+ depends on SQUASHFS
+ help
+ By default Squashfs sets the dev block size (sb_min_blocksize)
+ to 1K or the smallest block size supported by the block device
+ (if larger). This, because blocks are packed together and
+ unaligned in Squashfs, should reduce latency.
+
+ This, however, gives poor performance on MTD NAND devices where
+ the optimal I/O size is 4K (even though the devices can support
+ smaller block sizes).
+
+ Using a 4K device block size may also improve overall I/O
+ performance for some file access patterns (e.g. sequential
+ accesses of files in filesystem order) on all media.
+
+ Setting this option will force Squashfs to use a 4K device block
+ size by default.
+
+ If unsure, say N.
+
config SQUASHFS_EMBEDDED
bool "Additional option for memory-constrained systems"
depends on SQUASHFS
diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile
index cecf2be..110b047 100644
--- a/fs/squashfs/Makefile
+++ b/fs/squashfs/Makefile
@@ -4,7 +4,8 @@
obj-$(CONFIG_SQUASHFS) += squashfs.o
squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o
-squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o
+squashfs-y += namei.o super.o symlink.o decompressor.o
squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o
squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o
squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o
+squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index 9f1b0bb..3f6271d 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -52,6 +52,12 @@ static const struct squashfs_decompressor squashfs_xz_comp_ops = {
};
#endif
+#ifndef CONFIG_SQUASHFS_ZLIB
+static const struct squashfs_decompressor squashfs_zlib_comp_ops = {
+ NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0
+};
+#endif
+
static const struct squashfs_decompressor squashfs_unknown_comp_ops = {
NULL, NULL, NULL, 0, "unknown", 0
};
diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h
index 8ba70cf..330073e 100644
--- a/fs/squashfs/decompressor.h
+++ b/fs/squashfs/decompressor.h
@@ -56,4 +56,8 @@ extern const struct squashfs_decompressor squashfs_xz_comp_ops;
extern const struct squashfs_decompressor squashfs_lzo_comp_ops;
#endif
+#ifdef CONFIG_SQUASHFS_ZLIB
+extern const struct squashfs_decompressor squashfs_zlib_comp_ops;
+#endif
+
#endif
diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c
index 04bebca..fd7b3b3 100644
--- a/fs/squashfs/inode.c
+++ b/fs/squashfs/inode.c
@@ -159,7 +159,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
frag_offset = 0;
}
- inode->i_nlink = 1;
+ set_nlink(inode, 1);
inode->i_size = le32_to_cpu(sqsh_ino->file_size);
inode->i_fop = &generic_ro_fops;
inode->i_mode |= S_IFREG;
@@ -203,7 +203,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
}
xattr_id = le32_to_cpu(sqsh_ino->xattr);
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le64_to_cpu(sqsh_ino->file_size);
inode->i_op = &squashfs_inode_ops;
inode->i_fop = &generic_ro_fops;
@@ -232,7 +232,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
if (err < 0)
goto failed_read;
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le16_to_cpu(sqsh_ino->file_size);
inode->i_op = &squashfs_dir_inode_ops;
inode->i_fop = &squashfs_dir_ops;
@@ -257,7 +257,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
goto failed_read;
xattr_id = le32_to_cpu(sqsh_ino->xattr);
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le32_to_cpu(sqsh_ino->file_size);
inode->i_op = &squashfs_dir_inode_ops;
inode->i_fop = &squashfs_dir_ops;
@@ -284,7 +284,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
if (err < 0)
goto failed_read;
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
inode->i_size = le32_to_cpu(sqsh_ino->symlink_size);
inode->i_op = &squashfs_symlink_inode_ops;
inode->i_data.a_ops = &squashfs_symlink_aops;
@@ -325,7 +325,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFCHR;
else
inode->i_mode |= S_IFBLK;
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
@@ -349,7 +349,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFBLK;
xattr_id = le32_to_cpu(sqsh_ino->xattr);
inode->i_op = &squashfs_inode_ops;
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
rdev = le32_to_cpu(sqsh_ino->rdev);
init_special_inode(inode, inode->i_mode, new_decode_dev(rdev));
@@ -370,7 +370,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFIFO;
else
inode->i_mode |= S_IFSOCK;
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
break;
}
@@ -389,7 +389,7 @@ int squashfs_read_inode(struct inode *inode, long long ino)
inode->i_mode |= S_IFSOCK;
xattr_id = le32_to_cpu(sqsh_ino->xattr);
inode->i_op = &squashfs_inode_ops;
- inode->i_nlink = le32_to_cpu(sqsh_ino->nlink);
+ set_nlink(inode, le32_to_cpu(sqsh_ino->nlink));
init_special_inode(inode, inode->i_mode, 0);
break;
}
diff --git a/fs/squashfs/namei.c b/fs/squashfs/namei.c
index 4bc63ac..0682b38 100644
--- a/fs/squashfs/namei.c
+++ b/fs/squashfs/namei.c
@@ -220,11 +220,6 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
blk, off, ino_num);
inode = squashfs_iget(dir->i_sb, ino, ino_num);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- goto failed;
- }
-
goto exit_lookup;
}
}
@@ -232,10 +227,7 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
exit_lookup:
kfree(dire);
- if (inode)
- return d_splice_alias(inode, dentry);
- d_add(dentry, inode);
- return ERR_PTR(0);
+ return d_splice_alias(inode, dentry);
data_error:
err = -EIO;
diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h
index e3be6a7..d126651 100644
--- a/fs/squashfs/squashfs.h
+++ b/fs/squashfs/squashfs.h
@@ -97,6 +97,3 @@ extern const struct inode_operations squashfs_symlink_inode_ops;
/* xattr.c */
extern const struct xattr_handler *squashfs_xattr_handlers[];
-
-/* zlib_wrapper.c */
-extern const struct squashfs_decompressor squashfs_zlib_comp_ops;
diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h
index b4a4e53..e8e1464 100644
--- a/fs/squashfs/squashfs_fs.h
+++ b/fs/squashfs/squashfs_fs.h
@@ -36,6 +36,13 @@
#define SQUASHFS_FILE_SIZE 131072
#define SQUASHFS_FILE_LOG 17
+/* default size of block device I/O */
+#ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
+#define SQUASHFS_DEVBLK_SIZE 4096
+#else
+#define SQUASHFS_DEVBLK_SIZE 1024
+#endif
+
#define SQUASHFS_FILE_MAX_SIZE 1048576
#define SQUASHFS_FILE_MAX_LOG 20
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index b5a8636..4619247 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -95,7 +95,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
}
msblk = sb->s_fs_info;
- msblk->devblksize = sb_min_blocksize(sb, BLOCK_SIZE);
+ msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE);
msblk->devblksize_log2 = ffz(~msblk->devblksize);
mutex_init(&msblk->read_data_mutex);
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c
index 0630eb9..25ffb3e 100644
--- a/fs/sysv/inode.c
+++ b/fs/sysv/inode.c
@@ -219,7 +219,7 @@ struct inode *sysv_iget(struct super_block *sb, unsigned int ino)
inode->i_mode = fs16_to_cpu(sbi, raw_inode->i_mode);
inode->i_uid = (uid_t)fs16_to_cpu(sbi, raw_inode->i_uid);
inode->i_gid = (gid_t)fs16_to_cpu(sbi, raw_inode->i_gid);
- inode->i_nlink = fs16_to_cpu(sbi, raw_inode->i_nlink);
+ set_nlink(inode, fs16_to_cpu(sbi, raw_inode->i_nlink));
inode->i_size = fs32_to_cpu(sbi, raw_inode->i_size);
inode->i_atime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_atime);
inode->i_mtime.tv_sec = fs32_to_cpu(sbi, raw_inode->i_mtime);
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 284a7c8..427a4e8 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,44 +16,53 @@
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#
-ccflags-y := -I$(src) -I$(src)/linux-2.6
-ccflags-$(CONFIG_XFS_DEBUG) += -g
+ccflags-y += -I$(src) # needed for trace events
-XFS_LINUX := linux-2.6
+ccflags-$(CONFIG_XFS_DEBUG) += -g
obj-$(CONFIG_XFS_FS) += xfs.o
-xfs-y += linux-2.6/xfs_trace.o
-
-xfs-$(CONFIG_XFS_QUOTA) += $(addprefix quota/, \
- xfs_dquot.o \
- xfs_dquot_item.o \
- xfs_trans_dquot.o \
- xfs_qm_syscalls.o \
- xfs_qm_bhv.o \
- xfs_qm.o)
-xfs-$(CONFIG_XFS_QUOTA) += linux-2.6/xfs_quotaops.o
-
-ifeq ($(CONFIG_XFS_QUOTA),y)
-xfs-$(CONFIG_PROC_FS) += quota/xfs_qm_stats.o
-endif
-
-xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
-xfs-$(CONFIG_XFS_POSIX_ACL) += $(XFS_LINUX)/xfs_acl.o
-xfs-$(CONFIG_PROC_FS) += $(XFS_LINUX)/xfs_stats.o
-xfs-$(CONFIG_SYSCTL) += $(XFS_LINUX)/xfs_sysctl.o
-xfs-$(CONFIG_COMPAT) += $(XFS_LINUX)/xfs_ioctl32.o
+# this one should be compiled first, as the tracing macros can easily blow up
+xfs-y += xfs_trace.o
+# highlevel code
+xfs-y += xfs_aops.o \
+ xfs_bit.o \
+ xfs_buf.o \
+ xfs_dfrag.o \
+ xfs_discard.o \
+ xfs_error.o \
+ xfs_export.o \
+ xfs_file.o \
+ xfs_filestream.o \
+ xfs_fsops.o \
+ xfs_fs_subr.o \
+ xfs_globals.o \
+ xfs_iget.o \
+ xfs_ioctl.o \
+ xfs_iomap.o \
+ xfs_iops.o \
+ xfs_itable.o \
+ xfs_message.o \
+ xfs_mru_cache.o \
+ xfs_super.o \
+ xfs_sync.o \
+ xfs_xattr.o \
+ xfs_rename.o \
+ xfs_rw.o \
+ xfs_utils.o \
+ xfs_vnodeops.o \
+ kmem.o \
+ uuid.o
+# code shared with libxfs
xfs-y += xfs_alloc.o \
xfs_alloc_btree.o \
xfs_attr.o \
xfs_attr_leaf.o \
- xfs_bit.o \
xfs_bmap.o \
xfs_bmap_btree.o \
xfs_btree.o \
- xfs_buf_item.o \
xfs_da_btree.o \
xfs_dir2.o \
xfs_dir2_block.o \
@@ -61,51 +70,37 @@ xfs-y += xfs_alloc.o \
xfs_dir2_leaf.o \
xfs_dir2_node.o \
xfs_dir2_sf.o \
- xfs_error.o \
- xfs_extfree_item.o \
- xfs_filestream.o \
- xfs_fsops.o \
xfs_ialloc.o \
xfs_ialloc_btree.o \
- xfs_iget.o \
xfs_inode.o \
- xfs_inode_item.o \
- xfs_iomap.o \
- xfs_itable.o \
- xfs_dfrag.o \
- xfs_log.o \
- xfs_log_cil.o \
xfs_log_recover.o \
xfs_mount.o \
- xfs_mru_cache.o \
- xfs_rename.o \
- xfs_trans.o \
+ xfs_trans.o
+
+# low-level transaction/log code
+xfs-y += xfs_log.o \
+ xfs_log_cil.o \
+ xfs_buf_item.o \
+ xfs_extfree_item.o \
+ xfs_inode_item.o \
xfs_trans_ail.o \
xfs_trans_buf.o \
xfs_trans_extfree.o \
xfs_trans_inode.o \
- xfs_utils.o \
- xfs_vnodeops.o \
- xfs_rw.o
-xfs-$(CONFIG_XFS_TRACE) += xfs_btree_trace.o
-
-# Objects in linux/
-xfs-y += $(addprefix $(XFS_LINUX)/, \
- kmem.o \
- xfs_aops.o \
- xfs_buf.o \
- xfs_discard.o \
- xfs_export.o \
- xfs_file.o \
- xfs_fs_subr.o \
- xfs_globals.o \
- xfs_ioctl.o \
- xfs_iops.o \
- xfs_message.o \
- xfs_super.o \
- xfs_sync.o \
- xfs_xattr.o)
-
-# Objects in support/
-xfs-y += support/uuid.o
+# optional features
+xfs-$(CONFIG_XFS_QUOTA) += xfs_dquot.o \
+ xfs_dquot_item.o \
+ xfs_trans_dquot.o \
+ xfs_qm_syscalls.o \
+ xfs_qm_bhv.o \
+ xfs_qm.o \
+ xfs_quotaops.o
+ifeq ($(CONFIG_XFS_QUOTA),y)
+xfs-$(CONFIG_PROC_FS) += xfs_qm_stats.o
+endif
+xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
+xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
+xfs-$(CONFIG_PROC_FS) += xfs_stats.o
+xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
+xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
deleted file mode 100644
index a907de5..0000000
--- a/fs/xfs/linux-2.6/kmem.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/slab.h>
-#include <linux/swap.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include "time.h"
-#include "kmem.h"
-#include "xfs_message.h"
-
-/*
- * Greedy allocation. May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
- */
-void *
-kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
-{
- void *ptr;
- size_t kmsize = maxsize;
-
- while (!(ptr = kmem_zalloc_large(kmsize))) {
- if ((kmsize >>= 1) <= minsize)
- kmsize = minsize;
- }
- if (ptr)
- *size = kmsize;
- return ptr;
-}
-
-void *
-kmem_alloc(size_t size, unsigned int __nocast flags)
-{
- int retries = 0;
- gfp_t lflags = kmem_flags_convert(flags);
- void *ptr;
-
- do {
- ptr = kmalloc(size, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
- return ptr;
- if (!(++retries % 100))
- xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, lflags);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- } while (1);
-}
-
-void *
-kmem_zalloc(size_t size, unsigned int __nocast flags)
-{
- void *ptr;
-
- ptr = kmem_alloc(size, flags);
- if (ptr)
- memset((char *)ptr, 0, (int)size);
- return ptr;
-}
-
-void
-kmem_free(const void *ptr)
-{
- if (!is_vmalloc_addr(ptr)) {
- kfree(ptr);
- } else {
- vfree(ptr);
- }
-}
-
-void *
-kmem_realloc(const void *ptr, size_t newsize, size_t oldsize,
- unsigned int __nocast flags)
-{
- void *new;
-
- new = kmem_alloc(newsize, flags);
- if (ptr) {
- if (new)
- memcpy(new, ptr,
- ((oldsize < newsize) ? oldsize : newsize));
- kmem_free(ptr);
- }
- return new;
-}
-
-void *
-kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
- int retries = 0;
- gfp_t lflags = kmem_flags_convert(flags);
- void *ptr;
-
- do {
- ptr = kmem_cache_alloc(zone, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
- return ptr;
- if (!(++retries % 100))
- xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, lflags);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- } while (1);
-}
-
-void *
-kmem_zone_zalloc(kmem_zone_t *zone, unsigned int __nocast flags)
-{
- void *ptr;
-
- ptr = kmem_zone_alloc(zone, flags);
- if (ptr)
- memset((char *)ptr, 0, kmem_cache_size(zone));
- return ptr;
-}
diff --git a/fs/xfs/linux-2.6/kmem.h b/fs/xfs/linux-2.6/kmem.h
deleted file mode 100644
index f7c8f7a..0000000
--- a/fs/xfs/linux-2.6/kmem.h
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-/*
- * General memory allocation interfaces
- */
-
-#define KM_SLEEP 0x0001u
-#define KM_NOSLEEP 0x0002u
-#define KM_NOFS 0x0004u
-#define KM_MAYFAIL 0x0008u
-
-/*
- * We use a special process flag to avoid recursive callbacks into
- * the filesystem during transactions. We will also issue our own
- * warnings, so we explicitly skip any generic ones (silly of us).
- */
-static inline gfp_t
-kmem_flags_convert(unsigned int __nocast flags)
-{
- gfp_t lflags;
-
- BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL));
-
- if (flags & KM_NOSLEEP) {
- lflags = GFP_ATOMIC | __GFP_NOWARN;
- } else {
- lflags = GFP_KERNEL | __GFP_NOWARN;
- if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))
- lflags &= ~__GFP_FS;
- }
- return lflags;
-}
-
-extern void *kmem_alloc(size_t, unsigned int __nocast);
-extern void *kmem_zalloc(size_t, unsigned int __nocast);
-extern void *kmem_realloc(const void *, size_t, size_t, unsigned int __nocast);
-extern void kmem_free(const void *);
-
-static inline void *kmem_zalloc_large(size_t size)
-{
- void *ptr;
-
- ptr = vmalloc(size);
- if (ptr)
- memset(ptr, 0, size);
- return ptr;
-}
-static inline void kmem_free_large(void *ptr)
-{
- vfree(ptr);
-}
-
-extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
-
-/*
- * Zone interfaces
- */
-
-#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN
-#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
-
-#define kmem_zone kmem_cache
-#define kmem_zone_t struct kmem_cache
-
-static inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
- return kmem_cache_create(zone_name, size, 0, 0, NULL);
-}
-
-static inline kmem_zone_t *
-kmem_zone_init_flags(int size, char *zone_name, unsigned long flags,
- void (*construct)(void *))
-{
- return kmem_cache_create(zone_name, size, 0, flags, construct);
-}
-
-static inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
- kmem_cache_free(zone, ptr);
-}
-
-static inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
- if (zone)
- kmem_cache_destroy(zone);
-}
-
-extern void *kmem_zone_alloc(kmem_zone_t *, unsigned int __nocast);
-extern void *kmem_zone_zalloc(kmem_zone_t *, unsigned int __nocast);
-
-static inline int
-kmem_shake_allow(gfp_t gfp_mask)
-{
- return ((gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS));
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/linux-2.6/mrlock.h b/fs/xfs/linux-2.6/mrlock.h
deleted file mode 100644
index ff6a198..0000000
--- a/fs/xfs/linux-2.6/mrlock.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_MRLOCK_H__
-#define __XFS_SUPPORT_MRLOCK_H__
-
-#include <linux/rwsem.h>
-
-typedef struct {
- struct rw_semaphore mr_lock;
-#ifdef DEBUG
- int mr_writer;
-#endif
-} mrlock_t;
-
-#ifdef DEBUG
-#define mrinit(mrp, name) \
- do { (mrp)->mr_writer = 0; init_rwsem(&(mrp)->mr_lock); } while (0)
-#else
-#define mrinit(mrp, name) \
- do { init_rwsem(&(mrp)->mr_lock); } while (0)
-#endif
-
-#define mrlock_init(mrp, t,n,s) mrinit(mrp, n)
-#define mrfree(mrp) do { } while (0)
-
-static inline void mraccess_nested(mrlock_t *mrp, int subclass)
-{
- down_read_nested(&mrp->mr_lock, subclass);
-}
-
-static inline void mrupdate_nested(mrlock_t *mrp, int subclass)
-{
- down_write_nested(&mrp->mr_lock, subclass);
-#ifdef DEBUG
- mrp->mr_writer = 1;
-#endif
-}
-
-static inline int mrtryaccess(mrlock_t *mrp)
-{
- return down_read_trylock(&mrp->mr_lock);
-}
-
-static inline int mrtryupdate(mrlock_t *mrp)
-{
- if (!down_write_trylock(&mrp->mr_lock))
- return 0;
-#ifdef DEBUG
- mrp->mr_writer = 1;
-#endif
- return 1;
-}
-
-static inline void mrunlock_excl(mrlock_t *mrp)
-{
-#ifdef DEBUG
- mrp->mr_writer = 0;
-#endif
- up_write(&mrp->mr_lock);
-}
-
-static inline void mrunlock_shared(mrlock_t *mrp)
-{
- up_read(&mrp->mr_lock);
-}
-
-static inline void mrdemote(mrlock_t *mrp)
-{
-#ifdef DEBUG
- mrp->mr_writer = 0;
-#endif
- downgrade_write(&mrp->mr_lock);
-}
-
-#endif /* __XFS_SUPPORT_MRLOCK_H__ */
diff --git a/fs/xfs/linux-2.6/time.h b/fs/xfs/linux-2.6/time.h
deleted file mode 100644
index 387e695..0000000
--- a/fs/xfs/linux-2.6/time.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_TIME_H__
-#define __XFS_SUPPORT_TIME_H__
-
-#include <linux/sched.h>
-#include <linux/time.h>
-
-typedef struct timespec timespec_t;
-
-static inline void delay(long ticks)
-{
- schedule_timeout_uninterruptible(ticks);
-}
-
-static inline void nanotime(struct timespec *tvp)
-{
- *tvp = CURRENT_TIME;
-}
-
-#endif /* __XFS_SUPPORT_TIME_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
deleted file mode 100644
index f86e034..0000000
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_acl.h"
-#include "xfs_attr.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include <linux/slab.h>
-#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
-
-
-/*
- * Locking scheme:
- * - all ACL updates are protected by inode->i_mutex, which is taken before
- * calling into this file.
- */
-
-STATIC struct posix_acl *
-xfs_acl_from_disk(struct xfs_acl *aclp)
-{
- struct posix_acl_entry *acl_e;
- struct posix_acl *acl;
- struct xfs_acl_entry *ace;
- unsigned int count, i;
-
- count = be32_to_cpu(aclp->acl_cnt);
- if (count > XFS_ACL_MAX_ENTRIES)
- return ERR_PTR(-EFSCORRUPTED);
-
- acl = posix_acl_alloc(count, GFP_KERNEL);
- if (!acl)
- return ERR_PTR(-ENOMEM);
-
- for (i = 0; i < count; i++) {
- acl_e = &acl->a_entries[i];
- ace = &aclp->acl_entry[i];
-
- /*
- * The tag is 32 bits on disk and 16 bits in core.
- *
- * Because every access to it goes through the core
- * format first this is not a problem.
- */
- acl_e->e_tag = be32_to_cpu(ace->ae_tag);
- acl_e->e_perm = be16_to_cpu(ace->ae_perm);
-
- switch (acl_e->e_tag) {
- case ACL_USER:
- case ACL_GROUP:
- acl_e->e_id = be32_to_cpu(ace->ae_id);
- break;
- case ACL_USER_OBJ:
- case ACL_GROUP_OBJ:
- case ACL_MASK:
- case ACL_OTHER:
- acl_e->e_id = ACL_UNDEFINED_ID;
- break;
- default:
- goto fail;
- }
- }
- return acl;
-
-fail:
- posix_acl_release(acl);
- return ERR_PTR(-EINVAL);
-}
-
-STATIC void
-xfs_acl_to_disk(struct xfs_acl *aclp, const struct posix_acl *acl)
-{
- const struct posix_acl_entry *acl_e;
- struct xfs_acl_entry *ace;
- int i;
-
- aclp->acl_cnt = cpu_to_be32(acl->a_count);
- for (i = 0; i < acl->a_count; i++) {
- ace = &aclp->acl_entry[i];
- acl_e = &acl->a_entries[i];
-
- ace->ae_tag = cpu_to_be32(acl_e->e_tag);
- ace->ae_id = cpu_to_be32(acl_e->e_id);
- ace->ae_perm = cpu_to_be16(acl_e->e_perm);
- }
-}
-
-struct posix_acl *
-xfs_get_acl(struct inode *inode, int type)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct posix_acl *acl;
- struct xfs_acl *xfs_acl;
- int len = sizeof(struct xfs_acl);
- unsigned char *ea_name;
- int error;
-
- acl = get_cached_acl(inode, type);
- if (acl != ACL_NOT_CACHED)
- return acl;
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- ea_name = SGI_ACL_FILE;
- break;
- case ACL_TYPE_DEFAULT:
- ea_name = SGI_ACL_DEFAULT;
- break;
- default:
- BUG();
- }
-
- /*
- * If we have a cached ACLs value just return it, not need to
- * go out to the disk.
- */
-
- xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
- if (!xfs_acl)
- return ERR_PTR(-ENOMEM);
-
- error = -xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
- &len, ATTR_ROOT);
- if (error) {
- /*
- * If the attribute doesn't exist make sure we have a negative
- * cache entry, for any other error assume it is transient and
- * leave the cache entry as ACL_NOT_CACHED.
- */
- if (error == -ENOATTR) {
- acl = NULL;
- goto out_update_cache;
- }
- goto out;
- }
-
- acl = xfs_acl_from_disk(xfs_acl);
- if (IS_ERR(acl))
- goto out;
-
- out_update_cache:
- set_cached_acl(inode, type, acl);
- out:
- kfree(xfs_acl);
- return acl;
-}
-
-STATIC int
-xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
-{
- struct xfs_inode *ip = XFS_I(inode);
- unsigned char *ea_name;
- int error;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- switch (type) {
- case ACL_TYPE_ACCESS:
- ea_name = SGI_ACL_FILE;
- break;
- case ACL_TYPE_DEFAULT:
- if (!S_ISDIR(inode->i_mode))
- return acl ? -EACCES : 0;
- ea_name = SGI_ACL_DEFAULT;
- break;
- default:
- return -EINVAL;
- }
-
- if (acl) {
- struct xfs_acl *xfs_acl;
- int len;
-
- xfs_acl = kzalloc(sizeof(struct xfs_acl), GFP_KERNEL);
- if (!xfs_acl)
- return -ENOMEM;
-
- xfs_acl_to_disk(xfs_acl, acl);
- len = sizeof(struct xfs_acl) -
- (sizeof(struct xfs_acl_entry) *
- (XFS_ACL_MAX_ENTRIES - acl->a_count));
-
- error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
- len, ATTR_ROOT);
-
- kfree(xfs_acl);
- } else {
- /*
- * A NULL ACL argument means we want to remove the ACL.
- */
- error = -xfs_attr_remove(ip, ea_name, ATTR_ROOT);
-
- /*
- * If the attribute didn't exist to start with that's fine.
- */
- if (error == -ENOATTR)
- error = 0;
- }
-
- if (!error)
- set_cached_acl(inode, type, acl);
- return error;
-}
-
-int
-xfs_check_acl(struct inode *inode, int mask, unsigned int flags)
-{
- struct xfs_inode *ip;
- struct posix_acl *acl;
- int error = -EAGAIN;
-
- ip = XFS_I(inode);
- trace_xfs_check_acl(ip);
-
- /*
- * If there is no attribute fork no ACL exists on this inode and
- * we can skip the whole exercise.
- */
- if (!XFS_IFORK_Q(ip))
- return -EAGAIN;
-
- if (flags & IPERM_FLAG_RCU) {
- if (!negative_cached_acl(inode, ACL_TYPE_ACCESS))
- return -ECHILD;
- return -EAGAIN;
- }
-
- acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl) {
- error = posix_acl_permission(inode, acl, mask);
- posix_acl_release(acl);
- }
-
- return error;
-}
-
-static int
-xfs_set_mode(struct inode *inode, mode_t mode)
-{
- int error = 0;
-
- if (mode != inode->i_mode) {
- struct iattr iattr;
-
- iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
- iattr.ia_mode = mode;
- iattr.ia_ctime = current_fs_time(inode->i_sb);
-
- error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
- }
-
- return error;
-}
-
-static int
-xfs_acl_exists(struct inode *inode, unsigned char *name)
-{
- int len = sizeof(struct xfs_acl);
-
- return (xfs_attr_get(XFS_I(inode), name, NULL, &len,
- ATTR_ROOT|ATTR_KERNOVAL) == 0);
-}
-
-int
-posix_acl_access_exists(struct inode *inode)
-{
- return xfs_acl_exists(inode, SGI_ACL_FILE);
-}
-
-int
-posix_acl_default_exists(struct inode *inode)
-{
- if (!S_ISDIR(inode->i_mode))
- return 0;
- return xfs_acl_exists(inode, SGI_ACL_DEFAULT);
-}
-
-/*
- * No need for i_mutex because the inode is not yet exposed to the VFS.
- */
-int
-xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl)
-{
- struct posix_acl *clone;
- mode_t mode;
- int error = 0, inherit = 0;
-
- if (S_ISDIR(inode->i_mode)) {
- error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl);
- if (error)
- return error;
- }
-
- clone = posix_acl_clone(default_acl, GFP_KERNEL);
- if (!clone)
- return -ENOMEM;
-
- mode = inode->i_mode;
- error = posix_acl_create_masq(clone, &mode);
- if (error < 0)
- goto out_release_clone;
-
- /*
- * If posix_acl_create_masq returns a positive value we need to
- * inherit a permission that can't be represented using the Unix
- * mode bits and we actually need to set an ACL.
- */
- if (error > 0)
- inherit = 1;
-
- error = xfs_set_mode(inode, mode);
- if (error)
- goto out_release_clone;
-
- if (inherit)
- error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
-
- out_release_clone:
- posix_acl_release(clone);
- return error;
-}
-
-int
-xfs_acl_chmod(struct inode *inode)
-{
- struct posix_acl *acl, *clone;
- int error;
-
- if (S_ISLNK(inode->i_mode))
- return -EOPNOTSUPP;
-
- acl = xfs_get_acl(inode, ACL_TYPE_ACCESS);
- if (IS_ERR(acl) || !acl)
- return PTR_ERR(acl);
-
- clone = posix_acl_clone(acl, GFP_KERNEL);
- posix_acl_release(acl);
- if (!clone)
- return -ENOMEM;
-
- error = posix_acl_chmod_masq(clone, inode->i_mode);
- if (!error)
- error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone);
-
- posix_acl_release(clone);
- return error;
-}
-
-static int
-xfs_xattr_acl_get(struct dentry *dentry, const char *name,
- void *value, size_t size, int type)
-{
- struct posix_acl *acl;
- int error;
-
- acl = xfs_get_acl(dentry->d_inode, type);
- if (IS_ERR(acl))
- return PTR_ERR(acl);
- if (acl == NULL)
- return -ENODATA;
-
- error = posix_acl_to_xattr(acl, value, size);
- posix_acl_release(acl);
-
- return error;
-}
-
-static int
-xfs_xattr_acl_set(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags, int type)
-{
- struct inode *inode = dentry->d_inode;
- struct posix_acl *acl = NULL;
- int error = 0;
-
- if (flags & XATTR_CREATE)
- return -EINVAL;
- if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
- return value ? -EACCES : 0;
- if ((current_fsuid() != inode->i_uid) && !capable(CAP_FOWNER))
- return -EPERM;
-
- if (!value)
- goto set_acl;
-
- acl = posix_acl_from_xattr(value, size);
- if (!acl) {
- /*
- * acl_set_file(3) may request that we set default ACLs with
- * zero length -- defend (gracefully) against that here.
- */
- goto out;
- }
- if (IS_ERR(acl)) {
- error = PTR_ERR(acl);
- goto out;
- }
-
- error = posix_acl_valid(acl);
- if (error)
- goto out_release;
-
- error = -EINVAL;
- if (acl->a_count > XFS_ACL_MAX_ENTRIES)
- goto out_release;
-
- if (type == ACL_TYPE_ACCESS) {
- mode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
-
- if (error <= 0) {
- posix_acl_release(acl);
- acl = NULL;
-
- if (error < 0)
- return error;
- }
-
- error = xfs_set_mode(inode, mode);
- if (error)
- goto out_release;
- }
-
- set_acl:
- error = xfs_set_acl(inode, type, acl);
- out_release:
- posix_acl_release(acl);
- out:
- return error;
-}
-
-const struct xattr_handler xfs_xattr_acl_access_handler = {
- .prefix = POSIX_ACL_XATTR_ACCESS,
- .flags = ACL_TYPE_ACCESS,
- .get = xfs_xattr_acl_get,
- .set = xfs_xattr_acl_set,
-};
-
-const struct xattr_handler xfs_xattr_acl_default_handler = {
- .prefix = POSIX_ACL_XATTR_DEFAULT,
- .flags = ACL_TYPE_DEFAULT,
- .get = xfs_xattr_acl_get,
- .set = xfs_xattr_acl_set,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
deleted file mode 100644
index 79ce38b..0000000
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ /dev/null
@@ -1,1506 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_rw.h"
-#include "xfs_iomap.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-#include "xfs_bmap.h"
-#include <linux/gfp.h>
-#include <linux/mpage.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-
-
-/*
- * Prime number of hash buckets since address is used as the key.
- */
-#define NVSYNC 37
-#define to_ioend_wq(v) (&xfs_ioend_wq[((unsigned long)v) % NVSYNC])
-static wait_queue_head_t xfs_ioend_wq[NVSYNC];
-
-void __init
-xfs_ioend_init(void)
-{
- int i;
-
- for (i = 0; i < NVSYNC; i++)
- init_waitqueue_head(&xfs_ioend_wq[i]);
-}
-
-void
-xfs_ioend_wait(
- xfs_inode_t *ip)
-{
- wait_queue_head_t *wq = to_ioend_wq(ip);
-
- wait_event(*wq, (atomic_read(&ip->i_iocount) == 0));
-}
-
-STATIC void
-xfs_ioend_wake(
- xfs_inode_t *ip)
-{
- if (atomic_dec_and_test(&ip->i_iocount))
- wake_up(to_ioend_wq(ip));
-}
-
-void
-xfs_count_page_state(
- struct page *page,
- int *delalloc,
- int *unwritten)
-{
- struct buffer_head *bh, *head;
-
- *delalloc = *unwritten = 0;
-
- bh = head = page_buffers(page);
- do {
- if (buffer_unwritten(bh))
- (*unwritten) = 1;
- else if (buffer_delay(bh))
- (*delalloc) = 1;
- } while ((bh = bh->b_this_page) != head);
-}
-
-STATIC struct block_device *
-xfs_find_bdev_for_inode(
- struct inode *inode)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
-
- if (XFS_IS_REALTIME_INODE(ip))
- return mp->m_rtdev_targp->bt_bdev;
- else
- return mp->m_ddev_targp->bt_bdev;
-}
-
-/*
- * We're now finished for good with this ioend structure.
- * Update the page state via the associated buffer_heads,
- * release holds on the inode and bio, and finally free
- * up memory. Do not use the ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
- xfs_ioend_t *ioend)
-{
- struct buffer_head *bh, *next;
- struct xfs_inode *ip = XFS_I(ioend->io_inode);
-
- for (bh = ioend->io_buffer_head; bh; bh = next) {
- next = bh->b_private;
- bh->b_end_io(bh, !ioend->io_error);
- }
-
- /*
- * Volume managers supporting multiple paths can send back ENODEV
- * when the final path disappears. In this case continuing to fill
- * the page cache with dirty data which cannot be written out is
- * evil, so prevent that.
- */
- if (unlikely(ioend->io_error == -ENODEV)) {
- xfs_do_force_shutdown(ip->i_mount, SHUTDOWN_DEVICE_REQ,
- __FILE__, __LINE__);
- }
-
- xfs_ioend_wake(ip);
- mempool_free(ioend, xfs_ioend_pool);
-}
-
-/*
- * If the end of the current ioend is beyond the current EOF,
- * return the new EOF value, otherwise zero.
- */
-STATIC xfs_fsize_t
-xfs_ioend_new_eof(
- xfs_ioend_t *ioend)
-{
- xfs_inode_t *ip = XFS_I(ioend->io_inode);
- xfs_fsize_t isize;
- xfs_fsize_t bsize;
-
- bsize = ioend->io_offset + ioend->io_size;
- isize = MAX(ip->i_size, ip->i_new_size);
- isize = MIN(isize, bsize);
- return isize > ip->i_d.di_size ? isize : 0;
-}
-
-/*
- * Update on-disk file size now that data has been written to disk. The
- * current in-memory file size is i_size. If a write is beyond eof i_new_size
- * will be the intended file size until i_size is updated. If this write does
- * not extend all the way to the valid file size then restrict this update to
- * the end of the write.
- *
- * This function does not block as blocking on the inode lock in IO completion
- * can lead to IO completion order dependency deadlocks.. If it can't get the
- * inode ilock it will return EAGAIN. Callers must handle this.
- */
-STATIC int
-xfs_setfilesize(
- xfs_ioend_t *ioend)
-{
- xfs_inode_t *ip = XFS_I(ioend->io_inode);
- xfs_fsize_t isize;
-
- if (unlikely(ioend->io_error))
- return 0;
-
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
- return EAGAIN;
-
- isize = xfs_ioend_new_eof(ioend);
- if (isize) {
- ip->i_d.di_size = isize;
- xfs_mark_inode_dirty(ip);
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return 0;
-}
-
-/*
- * Schedule IO completion handling on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend(
- struct xfs_ioend *ioend)
-{
- if (atomic_dec_and_test(&ioend->io_remaining)) {
- if (ioend->io_type == IO_UNWRITTEN)
- queue_work(xfsconvertd_workqueue, &ioend->io_work);
- else
- queue_work(xfsdatad_workqueue, &ioend->io_work);
- }
-}
-
-/*
- * IO write completion.
- */
-STATIC void
-xfs_end_io(
- struct work_struct *work)
-{
- xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work);
- struct xfs_inode *ip = XFS_I(ioend->io_inode);
- int error = 0;
-
- /*
- * For unwritten extents we need to issue transactions to convert a
- * range to normal written extens after the data I/O has finished.
- */
- if (ioend->io_type == IO_UNWRITTEN &&
- likely(!ioend->io_error && !XFS_FORCED_SHUTDOWN(ip->i_mount))) {
-
- error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
- ioend->io_size);
- if (error)
- ioend->io_error = error;
- }
-
- /*
- * We might have to update the on-disk file size after extending
- * writes.
- */
- error = xfs_setfilesize(ioend);
- ASSERT(!error || error == EAGAIN);
-
- /*
- * If we didn't complete processing of the ioend, requeue it to the
- * tail of the workqueue for another attempt later. Otherwise destroy
- * it.
- */
- if (error == EAGAIN) {
- atomic_inc(&ioend->io_remaining);
- xfs_finish_ioend(ioend);
- /* ensure we don't spin on blocked ioends */
- delay(1);
- } else {
- if (ioend->io_iocb)
- aio_complete(ioend->io_iocb, ioend->io_result, 0);
- xfs_destroy_ioend(ioend);
- }
-}
-
-/*
- * Call IO completion handling in caller context on the final put of an ioend.
- */
-STATIC void
-xfs_finish_ioend_sync(
- struct xfs_ioend *ioend)
-{
- if (atomic_dec_and_test(&ioend->io_remaining))
- xfs_end_io(&ioend->io_work);
-}
-
-/*
- * Allocate and initialise an IO completion structure.
- * We need to track unwritten extent write completion here initially.
- * We'll need to extend this for updating the ondisk inode size later
- * (vs. incore size).
- */
-STATIC xfs_ioend_t *
-xfs_alloc_ioend(
- struct inode *inode,
- unsigned int type)
-{
- xfs_ioend_t *ioend;
-
- ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS);
-
- /*
- * Set the count to 1 initially, which will prevent an I/O
- * completion callback from happening before we have started
- * all the I/O from calling the completion routine too early.
- */
- atomic_set(&ioend->io_remaining, 1);
- ioend->io_error = 0;
- ioend->io_list = NULL;
- ioend->io_type = type;
- ioend->io_inode = inode;
- ioend->io_buffer_head = NULL;
- ioend->io_buffer_tail = NULL;
- atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
- ioend->io_offset = 0;
- ioend->io_size = 0;
- ioend->io_iocb = NULL;
- ioend->io_result = 0;
-
- INIT_WORK(&ioend->io_work, xfs_end_io);
- return ioend;
-}
-
-STATIC int
-xfs_map_blocks(
- struct inode *inode,
- loff_t offset,
- struct xfs_bmbt_irec *imap,
- int type,
- int nonblocking)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- ssize_t count = 1 << inode->i_blkbits;
- xfs_fileoff_t offset_fsb, end_fsb;
- int error = 0;
- int bmapi_flags = XFS_BMAPI_ENTIRE;
- int nimaps = 1;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (type == IO_UNWRITTEN)
- bmapi_flags |= XFS_BMAPI_IGSTATE;
-
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
- if (nonblocking)
- return -XFS_ERROR(EAGAIN);
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- }
-
- ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
- (ip->i_df.if_flags & XFS_IFEXTENTS));
- ASSERT(offset <= mp->m_maxioffset);
-
- if (offset + count > mp->m_maxioffset)
- count = mp->m_maxioffset - offset;
- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
- bmapi_flags, NULL, 0, imap, &nimaps, NULL);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- if (error)
- return -XFS_ERROR(error);
-
- if (type == IO_DELALLOC &&
- (!nimaps || isnullstartblock(imap->br_startblock))) {
- error = xfs_iomap_write_allocate(ip, offset, count, imap);
- if (!error)
- trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
- return -XFS_ERROR(error);
- }
-
-#ifdef DEBUG
- if (type == IO_UNWRITTEN) {
- ASSERT(nimaps);
- ASSERT(imap->br_startblock != HOLESTARTBLOCK);
- ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
- }
-#endif
- if (nimaps)
- trace_xfs_map_blocks_found(ip, offset, count, type, imap);
- return 0;
-}
-
-STATIC int
-xfs_imap_valid(
- struct inode *inode,
- struct xfs_bmbt_irec *imap,
- xfs_off_t offset)
-{
- offset >>= inode->i_blkbits;
-
- return offset >= imap->br_startoff &&
- offset < imap->br_startoff + imap->br_blockcount;
-}
-
-/*
- * BIO completion handler for buffered IO.
- */
-STATIC void
-xfs_end_bio(
- struct bio *bio,
- int error)
-{
- xfs_ioend_t *ioend = bio->bi_private;
-
- ASSERT(atomic_read(&bio->bi_cnt) >= 1);
- ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error;
-
- /* Toss bio and pass work off to an xfsdatad thread */
- bio->bi_private = NULL;
- bio->bi_end_io = NULL;
- bio_put(bio);
-
- xfs_finish_ioend(ioend);
-}
-
-STATIC void
-xfs_submit_ioend_bio(
- struct writeback_control *wbc,
- xfs_ioend_t *ioend,
- struct bio *bio)
-{
- atomic_inc(&ioend->io_remaining);
- bio->bi_private = ioend;
- bio->bi_end_io = xfs_end_bio;
-
- /*
- * If the I/O is beyond EOF we mark the inode dirty immediately
- * but don't update the inode size until I/O completion.
- */
- if (xfs_ioend_new_eof(ioend))
- xfs_mark_inode_dirty(XFS_I(ioend->io_inode));
-
- submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio);
-}
-
-STATIC struct bio *
-xfs_alloc_ioend_bio(
- struct buffer_head *bh)
-{
- int nvecs = bio_get_nr_vecs(bh->b_bdev);
- struct bio *bio = bio_alloc(GFP_NOIO, nvecs);
-
- ASSERT(bio->bi_private == NULL);
- bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
- bio->bi_bdev = bh->b_bdev;
- return bio;
-}
-
-STATIC void
-xfs_start_buffer_writeback(
- struct buffer_head *bh)
-{
- ASSERT(buffer_mapped(bh));
- ASSERT(buffer_locked(bh));
- ASSERT(!buffer_delay(bh));
- ASSERT(!buffer_unwritten(bh));
-
- mark_buffer_async_write(bh);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
-}
-
-STATIC void
-xfs_start_page_writeback(
- struct page *page,
- int clear_dirty,
- int buffers)
-{
- ASSERT(PageLocked(page));
- ASSERT(!PageWriteback(page));
- if (clear_dirty)
- clear_page_dirty_for_io(page);
- set_page_writeback(page);
- unlock_page(page);
- /* If no buffers on the page are to be written, finish it here */
- if (!buffers)
- end_page_writeback(page);
-}
-
-static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
-{
- return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
-}
-
-/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
- */
-STATIC void
-xfs_submit_ioend(
- struct writeback_control *wbc,
- xfs_ioend_t *ioend)
-{
- xfs_ioend_t *head = ioend;
- xfs_ioend_t *next;
- struct buffer_head *bh;
- struct bio *bio;
- sector_t lastblock = 0;
-
- /* Pass 1 - start writeback */
- do {
- next = ioend->io_list;
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
- xfs_start_buffer_writeback(bh);
- } while ((ioend = next) != NULL);
-
- /* Pass 2 - submit I/O */
- ioend = head;
- do {
- next = ioend->io_list;
- bio = NULL;
-
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
- if (!bio) {
- retry:
- bio = xfs_alloc_ioend_bio(bh);
- } else if (bh->b_blocknr != lastblock + 1) {
- xfs_submit_ioend_bio(wbc, ioend, bio);
- goto retry;
- }
-
- if (bio_add_buffer(bio, bh) != bh->b_size) {
- xfs_submit_ioend_bio(wbc, ioend, bio);
- goto retry;
- }
-
- lastblock = bh->b_blocknr;
- }
- if (bio)
- xfs_submit_ioend_bio(wbc, ioend, bio);
- xfs_finish_ioend(ioend);
- } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too. Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
- xfs_ioend_t *ioend)
-{
- xfs_ioend_t *next;
- struct buffer_head *bh, *next_bh;
-
- do {
- next = ioend->io_list;
- bh = ioend->io_buffer_head;
- do {
- next_bh = bh->b_private;
- clear_buffer_async_write(bh);
- unlock_buffer(bh);
- } while ((bh = next_bh) != NULL);
-
- xfs_ioend_wake(XFS_I(ioend->io_inode));
- mempool_free(ioend, xfs_ioend_pool);
- } while ((ioend = next) != NULL);
-}
-
-/*
- * Test to see if we've been building up a completion structure for
- * earlier buffers -- if so, we try to append to this ioend if we
- * can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
- */
-STATIC void
-xfs_add_to_ioend(
- struct inode *inode,
- struct buffer_head *bh,
- xfs_off_t offset,
- unsigned int type,
- xfs_ioend_t **result,
- int need_ioend)
-{
- xfs_ioend_t *ioend = *result;
-
- if (!ioend || need_ioend || type != ioend->io_type) {
- xfs_ioend_t *previous = *result;
-
- ioend = xfs_alloc_ioend(inode, type);
- ioend->io_offset = offset;
- ioend->io_buffer_head = bh;
- ioend->io_buffer_tail = bh;
- if (previous)
- previous->io_list = ioend;
- *result = ioend;
- } else {
- ioend->io_buffer_tail->b_private = bh;
- ioend->io_buffer_tail = bh;
- }
-
- bh->b_private = NULL;
- ioend->io_size += bh->b_size;
-}
-
-STATIC void
-xfs_map_buffer(
- struct inode *inode,
- struct buffer_head *bh,
- struct xfs_bmbt_irec *imap,
- xfs_off_t offset)
-{
- sector_t bn;
- struct xfs_mount *m = XFS_I(inode)->i_mount;
- xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
- xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
-
- ASSERT(imap->br_startblock != HOLESTARTBLOCK);
- ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
- bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
- ((offset - iomap_offset) >> inode->i_blkbits);
-
- ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
-
- bh->b_blocknr = bn;
- set_buffer_mapped(bh);
-}
-
-STATIC void
-xfs_map_at_offset(
- struct inode *inode,
- struct buffer_head *bh,
- struct xfs_bmbt_irec *imap,
- xfs_off_t offset)
-{
- ASSERT(imap->br_startblock != HOLESTARTBLOCK);
- ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
-
- xfs_map_buffer(inode, bh, imap, offset);
- set_buffer_mapped(bh);
- clear_buffer_delay(bh);
- clear_buffer_unwritten(bh);
-}
-
-/*
- * Test if a given page is suitable for writing as part of an unwritten
- * or delayed allocate extent.
- */
-STATIC int
-xfs_is_delayed_page(
- struct page *page,
- unsigned int type)
-{
- if (PageWriteback(page))
- return 0;
-
- if (page->mapping && page_has_buffers(page)) {
- struct buffer_head *bh, *head;
- int acceptable = 0;
-
- bh = head = page_buffers(page);
- do {
- if (buffer_unwritten(bh))
- acceptable = (type == IO_UNWRITTEN);
- else if (buffer_delay(bh))
- acceptable = (type == IO_DELALLOC);
- else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable = (type == IO_OVERWRITE);
- else
- break;
- } while ((bh = bh->b_this_page) != head);
-
- if (acceptable)
- return 1;
- }
-
- return 0;
-}
-
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
- struct inode *inode,
- struct page *page,
- loff_t tindex,
- struct xfs_bmbt_irec *imap,
- xfs_ioend_t **ioendp,
- struct writeback_control *wbc)
-{
- struct buffer_head *bh, *head;
- xfs_off_t end_offset;
- unsigned long p_offset;
- unsigned int type;
- int len, page_dirty;
- int count = 0, done = 0, uptodate = 1;
- xfs_off_t offset = page_offset(page);
-
- if (page->index != tindex)
- goto fail;
- if (!trylock_page(page))
- goto fail;
- if (PageWriteback(page))
- goto fail_unlock_page;
- if (page->mapping != inode->i_mapping)
- goto fail_unlock_page;
- if (!xfs_is_delayed_page(page, (*ioendp)->io_type))
- goto fail_unlock_page;
-
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decremented as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- i_size_read(inode));
-
- len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
-
- bh = head = page_buffers(page);
- do {
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh))) {
- done = 1;
- continue;
- }
-
- if (buffer_unwritten(bh) || buffer_delay(bh) ||
- buffer_mapped(bh)) {
- if (buffer_unwritten(bh))
- type = IO_UNWRITTEN;
- else if (buffer_delay(bh))
- type = IO_DELALLOC;
- else
- type = IO_OVERWRITE;
-
- if (!xfs_imap_valid(inode, imap, offset)) {
- done = 1;
- continue;
- }
-
- lock_buffer(bh);
- if (type != IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type,
- ioendp, done);
-
- page_dirty--;
- count++;
- } else {
- done = 1;
- }
- } while (offset += len, (bh = bh->b_this_page) != head);
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- if (count) {
- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE)
- done = 1;
- }
- xfs_start_page_writeback(page, !page_dirty, count);
-
- return done;
- fail_unlock_page:
- unlock_page(page);
- fail:
- return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
- struct inode *inode,
- pgoff_t tindex,
- struct xfs_bmbt_irec *imap,
- xfs_ioend_t **ioendp,
- struct writeback_control *wbc,
- pgoff_t tlast)
-{
- struct pagevec pvec;
- int done = 0, i;
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tlast) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc);
- if (done)
- break;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
-STATIC void
-xfs_vm_invalidatepage(
- struct page *page,
- unsigned long offset)
-{
- trace_xfs_invalidatepage(page->mapping->host, page, offset);
- block_invalidatepage(page, offset);
-}
-
-/*
- * If the page has delalloc buffers on it, we need to punch them out before we
- * invalidate the page. If we don't, we leave a stale delalloc mapping on the
- * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
- * is done on that same region - the delalloc extent is returned when none is
- * supposed to be there.
- *
- * We prevent this by truncating away the delalloc regions on the page before
- * invalidating it. Because they are delalloc, we can do this without needing a
- * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
- * truncation without a transaction as there is no space left for block
- * reservation (typically why we see a ENOSPC in writeback).
- *
- * This is not a performance critical path, so for now just do the punching a
- * buffer head at a time.
- */
-STATIC void
-xfs_aops_discard_page(
- struct page *page)
-{
- struct inode *inode = page->mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct buffer_head *bh, *head;
- loff_t offset = page_offset(page);
-
- if (!xfs_is_delayed_page(page, IO_DELALLOC))
- goto out_invalidate;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- goto out_invalidate;
-
- xfs_alert(ip->i_mount,
- "page discard on page %p, inode 0x%llx, offset %llu.",
- page, ip->i_ino, offset);
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- bh = head = page_buffers(page);
- do {
- int error;
- xfs_fileoff_t start_fsb;
-
- if (!buffer_delay(bh))
- goto next_buffer;
-
- start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
- error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_alert(ip->i_mount,
- "page discard unable to remove delalloc mapping.");
- }
- break;
- }
-next_buffer:
- offset += 1 << inode->i_blkbits;
-
- } while ((bh = bh->b_this_page) != head);
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-out_invalidate:
- xfs_vm_invalidatepage(page, 0);
- return;
-}
-
-/*
- * Write out a dirty page.
- *
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
- * regular allocated space.
- * For any other dirty buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush the page, we
- * have to check the process flags first, if we are already in a transaction
- * or disk I/O during allocations is off, we need to fail the writepage and
- * redirty the page.
- */
-STATIC int
-xfs_vm_writepage(
- struct page *page,
- struct writeback_control *wbc)
-{
- struct inode *inode = page->mapping->host;
- int delalloc, unwritten;
- struct buffer_head *bh, *head;
- struct xfs_bmbt_irec imap;
- xfs_ioend_t *ioend = NULL, *iohead = NULL;
- loff_t offset;
- unsigned int type;
- __uint64_t end_offset;
- pgoff_t end_index, last_index;
- ssize_t len;
- int err, imap_valid = 0, uptodate = 1;
- int count = 0;
- int nonblocking = 0;
-
- trace_xfs_writepage(inode, page, 0);
-
- ASSERT(page_has_buffers(page));
-
- /*
- * Refuse to write the page out if we are called from reclaim context.
- *
- * This avoids stack overflows when called from deeply used stacks in
- * random callers for direct reclaim or memcg reclaim. We explicitly
- * allow reclaim from kswapd as the stack usage there is relatively low.
- *
- * This should really be done by the core VM, but until that happens
- * filesystems like XFS, btrfs and ext4 have to take care of this
- * by themselves.
- */
- if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
- goto redirty;
-
- /*
- * We need a transaction if there are delalloc or unwritten buffers
- * on the page.
- *
- * If we need a transaction and the process flags say we are already
- * in a transaction, or no IO is allowed then mark the page dirty
- * again and leave the page as is.
- */
- xfs_count_page_state(page, &delalloc, &unwritten);
- if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
- goto redirty;
-
- /* Is this page beyond the end of the file? */
- offset = i_size_read(inode);
- end_index = offset >> PAGE_CACHE_SHIFT;
- last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
- if (page->index >= end_index) {
- if ((page->index >= end_index + 1) ||
- !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
- unlock_page(page);
- return 0;
- }
- }
-
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- offset);
- len = 1 << inode->i_blkbits;
-
- bh = head = page_buffers(page);
- offset = page_offset(page);
- type = IO_OVERWRITE;
-
- if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
- nonblocking = 1;
-
- do {
- int new_ioend = 0;
-
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
-
- /*
- * set_page_dirty dirties all buffers in a page, independent
- * of their state. The dirty state however is entirely
- * meaningless for holes (!mapped && uptodate), so skip
- * buffers covering holes here.
- */
- if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- imap_valid = 0;
- continue;
- }
-
- if (buffer_unwritten(bh)) {
- if (type != IO_UNWRITTEN) {
- type = IO_UNWRITTEN;
- imap_valid = 0;
- }
- } else if (buffer_delay(bh)) {
- if (type != IO_DELALLOC) {
- type = IO_DELALLOC;
- imap_valid = 0;
- }
- } else if (buffer_uptodate(bh)) {
- if (type != IO_OVERWRITE) {
- type = IO_OVERWRITE;
- imap_valid = 0;
- }
- } else {
- if (PageUptodate(page)) {
- ASSERT(buffer_mapped(bh));
- imap_valid = 0;
- }
- continue;
- }
-
- if (imap_valid)
- imap_valid = xfs_imap_valid(inode, &imap, offset);
- if (!imap_valid) {
- /*
- * If we didn't have a valid mapping then we need to
- * put the new mapping into a separate ioend structure.
- * This ensures non-contiguous extents always have
- * separate ioends, which is particularly important
- * for unwritten extent conversion at I/O completion
- * time.
- */
- new_ioend = 1;
- err = xfs_map_blocks(inode, offset, &imap, type,
- nonblocking);
- if (err)
- goto error;
- imap_valid = xfs_imap_valid(inode, &imap, offset);
- }
- if (imap_valid) {
- lock_buffer(bh);
- if (type != IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, &imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type, &ioend,
- new_ioend);
- count++;
- }
-
- if (!iohead)
- iohead = ioend;
-
- } while (offset += len, ((bh = bh->b_this_page) != head));
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- xfs_start_page_writeback(page, 1, count);
-
- if (ioend && imap_valid) {
- xfs_off_t end_index;
-
- end_index = imap.br_startoff + imap.br_blockcount;
-
- /* to bytes */
- end_index <<= inode->i_blkbits;
-
- /* to pages */
- end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
- /* check against file size */
- if (end_index > last_index)
- end_index = last_index;
-
- xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
- wbc, end_index);
- }
-
- if (iohead)
- xfs_submit_ioend(wbc, iohead);
-
- return 0;
-
-error:
- if (iohead)
- xfs_cancel_ioend(iohead);
-
- if (err == -EAGAIN)
- goto redirty;
-
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- unlock_page(page);
- return err;
-
-redirty:
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
-
-STATIC int
-xfs_vm_writepages(
- struct address_space *mapping,
- struct writeback_control *wbc)
-{
- xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- return generic_writepages(mapping, wbc);
-}
-
-/*
- * Called to move a page into cleanable state - and from there
- * to be released. The page should already be clean. We always
- * have buffer heads in this call.
- *
- * Returns 1 if the page is ok to release, 0 otherwise.
- */
-STATIC int
-xfs_vm_releasepage(
- struct page *page,
- gfp_t gfp_mask)
-{
- int delalloc, unwritten;
-
- trace_xfs_releasepage(page->mapping->host, page, 0);
-
- xfs_count_page_state(page, &delalloc, &unwritten);
-
- if (WARN_ON(delalloc))
- return 0;
- if (WARN_ON(unwritten))
- return 0;
-
- return try_to_free_buffers(page);
-}
-
-STATIC int
-__xfs_get_blocks(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create,
- int direct)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb, end_fsb;
- int error = 0;
- int lockmode = 0;
- struct xfs_bmbt_irec imap;
- int nimaps = 1;
- xfs_off_t offset;
- ssize_t size;
- int new = 0;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- offset = (xfs_off_t)iblock << inode->i_blkbits;
- ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
- size = bh_result->b_size;
-
- if (!create && direct && offset >= i_size_read(inode))
- return 0;
-
- if (create) {
- lockmode = XFS_ILOCK_EXCL;
- xfs_ilock(ip, lockmode);
- } else {
- lockmode = xfs_ilock_map_shared(ip);
- }
-
- ASSERT(offset <= mp->m_maxioffset);
- if (offset + size > mp->m_maxioffset)
- size = mp->m_maxioffset - offset;
- end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
-
- error = xfs_bmapi(NULL, ip, offset_fsb, end_fsb - offset_fsb,
- XFS_BMAPI_ENTIRE, NULL, 0, &imap, &nimaps, NULL);
- if (error)
- goto out_unlock;
-
- if (create &&
- (!nimaps ||
- (imap.br_startblock == HOLESTARTBLOCK ||
- imap.br_startblock == DELAYSTARTBLOCK))) {
- if (direct) {
- error = xfs_iomap_write_direct(ip, offset, size,
- &imap, nimaps);
- } else {
- error = xfs_iomap_write_delay(ip, offset, size, &imap);
- }
- if (error)
- goto out_unlock;
-
- trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap);
- } else if (nimaps) {
- trace_xfs_get_blocks_found(ip, offset, size, 0, &imap);
- } else {
- trace_xfs_get_blocks_notfound(ip, offset, size);
- goto out_unlock;
- }
- xfs_iunlock(ip, lockmode);
-
- if (imap.br_startblock != HOLESTARTBLOCK &&
- imap.br_startblock != DELAYSTARTBLOCK) {
- /*
- * For unwritten extents do not report a disk address on
- * the read case (treat as if we're reading into a hole).
- */
- if (create || !ISUNWRITTEN(&imap))
- xfs_map_buffer(inode, bh_result, &imap, offset);
- if (create && ISUNWRITTEN(&imap)) {
- if (direct)
- bh_result->b_private = inode;
- set_buffer_unwritten(bh_result);
- }
- }
-
- /*
- * If this is a realtime file, data may be on a different device.
- * to that pointed to from the buffer_head b_bdev currently.
- */
- bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
-
- /*
- * If we previously allocated a block out beyond eof and we are now
- * coming back to use it then we will need to flag it as new even if it
- * has a disk address.
- *
- * With sub-block writes into unwritten extents we also need to mark
- * the buffer as new so that the unwritten parts of the buffer gets
- * correctly zeroed.
- */
- if (create &&
- ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
- (offset >= i_size_read(inode)) ||
- (new || ISUNWRITTEN(&imap))))
- set_buffer_new(bh_result);
-
- if (imap.br_startblock == DELAYSTARTBLOCK) {
- BUG_ON(direct);
- if (create) {
- set_buffer_uptodate(bh_result);
- set_buffer_mapped(bh_result);
- set_buffer_delay(bh_result);
- }
- }
-
- /*
- * If this is O_DIRECT or the mpage code calling tell them how large
- * the mapping is, so that we can avoid repeated get_blocks calls.
- */
- if (direct || size > (1 << inode->i_blkbits)) {
- xfs_off_t mapping_size;
-
- mapping_size = imap.br_startoff + imap.br_blockcount - iblock;
- mapping_size <<= inode->i_blkbits;
-
- ASSERT(mapping_size > 0);
- if (mapping_size > size)
- mapping_size = size;
- if (mapping_size > LONG_MAX)
- mapping_size = LONG_MAX;
-
- bh_result->b_size = mapping_size;
- }
-
- return 0;
-
-out_unlock:
- xfs_iunlock(ip, lockmode);
- return -error;
-}
-
-int
-xfs_get_blocks(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create)
-{
- return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
-}
-
-STATIC int
-xfs_get_blocks_direct(
- struct inode *inode,
- sector_t iblock,
- struct buffer_head *bh_result,
- int create)
-{
- return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
-}
-
-/*
- * Complete a direct I/O write request.
- *
- * If the private argument is non-NULL __xfs_get_blocks signals us that we
- * need to issue a transaction to convert the range from unwritten to written
- * extents. In case this is regular synchronous I/O we just call xfs_end_io
- * to do this and we are done. But in case this was a successful AIO
- * request this handler is called from interrupt context, from which we
- * can't start transactions. In that case offload the I/O completion to
- * the workqueues we also use for buffered I/O completion.
- */
-STATIC void
-xfs_end_io_direct_write(
- struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private,
- int ret,
- bool is_async)
-{
- struct xfs_ioend *ioend = iocb->private;
-
- /*
- * blockdev_direct_IO can return an error even after the I/O
- * completion handler was called. Thus we need to protect
- * against double-freeing.
- */
- iocb->private = NULL;
-
- ioend->io_offset = offset;
- ioend->io_size = size;
- if (private && size > 0)
- ioend->io_type = IO_UNWRITTEN;
-
- if (is_async) {
- /*
- * If we are converting an unwritten extent we need to delay
- * the AIO completion until after the unwrittent extent
- * conversion has completed, otherwise do it ASAP.
- */
- if (ioend->io_type == IO_UNWRITTEN) {
- ioend->io_iocb = iocb;
- ioend->io_result = ret;
- } else {
- aio_complete(iocb, ret, 0);
- }
- xfs_finish_ioend(ioend);
- } else {
- xfs_finish_ioend_sync(ioend);
- }
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
- int rw,
- struct kiocb *iocb,
- const struct iovec *iov,
- loff_t offset,
- unsigned long nr_segs)
-{
- struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
- ssize_t ret;
-
- if (rw & WRITE) {
- iocb->private = xfs_alloc_ioend(inode, IO_DIRECT);
-
- ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
- offset, nr_segs,
- xfs_get_blocks_direct,
- xfs_end_io_direct_write, NULL, 0);
- if (ret != -EIOCBQUEUED && iocb->private)
- xfs_destroy_ioend(iocb->private);
- } else {
- ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
- offset, nr_segs,
- xfs_get_blocks_direct,
- NULL, NULL, 0);
- }
-
- return ret;
-}
-
-STATIC void
-xfs_vm_write_failed(
- struct address_space *mapping,
- loff_t to)
-{
- struct inode *inode = mapping->host;
-
- if (to > inode->i_size) {
- /*
- * punch out the delalloc blocks we have already allocated. We
- * don't call xfs_setattr() to do this as we may be in the
- * middle of a multi-iovec write and so the vfs inode->i_size
- * will not match the xfs ip->i_size and so it will zero too
- * much. Hence we jus truncate the page cache to zero what is
- * necessary and punch the delalloc blocks directly.
- */
- struct xfs_inode *ip = XFS_I(inode);
- xfs_fileoff_t start_fsb;
- xfs_fileoff_t end_fsb;
- int error;
-
- truncate_pagecache(inode, to, inode->i_size);
-
- /*
- * Check if there are any blocks that are outside of i_size
- * that need to be trimmed back.
- */
- start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
- end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
- if (end_fsb <= start_fsb)
- return;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
- end_fsb - start_fsb);
- if (error) {
- /* something screwed, just bail */
- if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_alert(ip->i_mount,
- "xfs_vm_write_failed: unable to clean up ino %lld",
- ip->i_ino);
- }
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- }
-}
-
-STATIC int
-xfs_vm_write_begin(
- struct file *file,
- struct address_space *mapping,
- loff_t pos,
- unsigned len,
- unsigned flags,
- struct page **pagep,
- void **fsdata)
-{
- int ret;
-
- ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
- pagep, xfs_get_blocks);
- if (unlikely(ret))
- xfs_vm_write_failed(mapping, pos + len);
- return ret;
-}
-
-STATIC int
-xfs_vm_write_end(
- struct file *file,
- struct address_space *mapping,
- loff_t pos,
- unsigned len,
- unsigned copied,
- struct page *page,
- void *fsdata)
-{
- int ret;
-
- ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
- if (unlikely(ret < len))
- xfs_vm_write_failed(mapping, pos + len);
- return ret;
-}
-
-STATIC sector_t
-xfs_vm_bmap(
- struct address_space *mapping,
- sector_t block)
-{
- struct inode *inode = (struct inode *)mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
-
- trace_xfs_vm_bmap(XFS_I(inode));
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return generic_block_bmap(mapping, block, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpage(
- struct file *unused,
- struct page *page)
-{
- return mpage_readpage(page, xfs_get_blocks);
-}
-
-STATIC int
-xfs_vm_readpages(
- struct file *unused,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned nr_pages)
-{
- return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
-}
-
-const struct address_space_operations xfs_address_space_operations = {
- .readpage = xfs_vm_readpage,
- .readpages = xfs_vm_readpages,
- .writepage = xfs_vm_writepage,
- .writepages = xfs_vm_writepages,
- .releasepage = xfs_vm_releasepage,
- .invalidatepage = xfs_vm_invalidatepage,
- .write_begin = xfs_vm_write_begin,
- .write_end = xfs_vm_write_end,
- .bmap = xfs_vm_bmap,
- .direct_IO = xfs_vm_direct_IO,
- .migratepage = buffer_migrate_page,
- .is_partially_uptodate = block_is_partially_uptodate,
- .error_remove_page = generic_error_remove_page,
-};
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
deleted file mode 100644
index 71f721e..0000000
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_AOPS_H__
-#define __XFS_AOPS_H__
-
-extern struct workqueue_struct *xfsdatad_workqueue;
-extern struct workqueue_struct *xfsconvertd_workqueue;
-extern mempool_t *xfs_ioend_pool;
-
-/*
- * Types of I/O for bmap clustering and I/O completion tracking.
- */
-enum {
- IO_DIRECT = 0, /* special case for direct I/O ioends */
- IO_DELALLOC, /* mapping covers delalloc region */
- IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
- IO_OVERWRITE, /* mapping covers already allocated extent */
-};
-
-#define XFS_IO_TYPES \
- { 0, "" }, \
- { IO_DELALLOC, "delalloc" }, \
- { IO_UNWRITTEN, "unwritten" }, \
- { IO_OVERWRITE, "overwrite" }
-
-/*
- * xfs_ioend struct manages large extent writes for XFS.
- * It can manage several multi-page bio's at once.
- */
-typedef struct xfs_ioend {
- struct xfs_ioend *io_list; /* next ioend in chain */
- unsigned int io_type; /* delalloc / unwritten */
- int io_error; /* I/O error code */
- atomic_t io_remaining; /* hold count */
- struct inode *io_inode; /* file being written to */
- struct buffer_head *io_buffer_head;/* buffer linked list head */
- struct buffer_head *io_buffer_tail;/* buffer linked list tail */
- size_t io_size; /* size of the extent */
- xfs_off_t io_offset; /* offset in the file */
- struct work_struct io_work; /* xfsdatad work queue */
- struct kiocb *io_iocb;
- int io_result;
-} xfs_ioend_t;
-
-extern const struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
-
-extern void xfs_ioend_init(void);
-extern void xfs_ioend_wait(struct xfs_inode *);
-
-extern void xfs_count_page_state(struct page *, int *, int *);
-
-#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
deleted file mode 100644
index 5e68099..0000000
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ /dev/null
@@ -1,1899 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include <linux/stddef.h>
-#include <linux/errno.h>
-#include <linux/gfp.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/vmalloc.h>
-#include <linux/bio.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/workqueue.h>
-#include <linux/percpu.h>
-#include <linux/blkdev.h>
-#include <linux/hash.h>
-#include <linux/kthread.h>
-#include <linux/migrate.h>
-#include <linux/backing-dev.h>
-#include <linux/freezer.h>
-
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_trace.h"
-
-static kmem_zone_t *xfs_buf_zone;
-STATIC int xfsbufd(void *);
-STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-
-static struct workqueue_struct *xfslogd_workqueue;
-struct workqueue_struct *xfsdatad_workqueue;
-struct workqueue_struct *xfsconvertd_workqueue;
-
-#ifdef XFS_BUF_LOCK_TRACKING
-# define XB_SET_OWNER(bp) ((bp)->b_last_holder = current->pid)
-# define XB_CLEAR_OWNER(bp) ((bp)->b_last_holder = -1)
-# define XB_GET_OWNER(bp) ((bp)->b_last_holder)
-#else
-# define XB_SET_OWNER(bp) do { } while (0)
-# define XB_CLEAR_OWNER(bp) do { } while (0)
-# define XB_GET_OWNER(bp) do { } while (0)
-#endif
-
-#define xb_to_gfp(flags) \
- ((((flags) & XBF_READ_AHEAD) ? __GFP_NORETRY : \
- ((flags) & XBF_DONT_BLOCK) ? GFP_NOFS : GFP_KERNEL) | __GFP_NOWARN)
-
-#define xb_to_km(flags) \
- (((flags) & XBF_DONT_BLOCK) ? KM_NOFS : KM_SLEEP)
-
-#define xfs_buf_allocate(flags) \
- kmem_zone_alloc(xfs_buf_zone, xb_to_km(flags))
-#define xfs_buf_deallocate(bp) \
- kmem_zone_free(xfs_buf_zone, (bp));
-
-static inline int
-xfs_buf_is_vmapped(
- struct xfs_buf *bp)
-{
- /*
- * Return true if the buffer is vmapped.
- *
- * The XBF_MAPPED flag is set if the buffer should be mapped, but the
- * code is clever enough to know it doesn't have to map a single page,
- * so the check has to be both for XBF_MAPPED and bp->b_page_count > 1.
- */
- return (bp->b_flags & XBF_MAPPED) && bp->b_page_count > 1;
-}
-
-static inline int
-xfs_buf_vmap_len(
- struct xfs_buf *bp)
-{
- return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
-}
-
-/*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
- struct xfs_buf *bp)
-{
- struct xfs_buftarg *btp = bp->b_target;
-
- spin_lock(&btp->bt_lru_lock);
- if (list_empty(&bp->b_lru)) {
- atomic_inc(&bp->b_hold);
- list_add_tail(&bp->b_lru, &btp->bt_lru);
- btp->bt_lru_nr++;
- }
- spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
- struct xfs_buf *bp)
-{
- struct xfs_buftarg *btp = bp->b_target;
-
- if (list_empty(&bp->b_lru))
- return;
-
- spin_lock(&btp->bt_lru_lock);
- if (!list_empty(&bp->b_lru)) {
- list_del_init(&bp->b_lru);
- btp->bt_lru_nr--;
- }
- spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * When we mark a buffer stale, we remove the buffer from the LRU and clear the
- * b_lru_ref count so that the buffer is freed immediately when the buffer
- * reference count falls to zero. If the buffer is already on the LRU, we need
- * to remove the reference that LRU holds on the buffer.
- *
- * This prevents build-up of stale buffers on the LRU.
- */
-void
-xfs_buf_stale(
- struct xfs_buf *bp)
-{
- bp->b_flags |= XBF_STALE;
- atomic_set(&(bp)->b_lru_ref, 0);
- if (!list_empty(&bp->b_lru)) {
- struct xfs_buftarg *btp = bp->b_target;
-
- spin_lock(&btp->bt_lru_lock);
- if (!list_empty(&bp->b_lru)) {
- list_del_init(&bp->b_lru);
- btp->bt_lru_nr--;
- atomic_dec(&bp->b_hold);
- }
- spin_unlock(&btp->bt_lru_lock);
- }
- ASSERT(atomic_read(&bp->b_hold) >= 1);
-}
-
-STATIC void
-_xfs_buf_initialize(
- xfs_buf_t *bp,
- xfs_buftarg_t *target,
- xfs_off_t range_base,
- size_t range_length,
- xfs_buf_flags_t flags)
-{
- /*
- * We don't want certain flags to appear in b_flags.
- */
- flags &= ~(XBF_LOCK|XBF_MAPPED|XBF_DONT_BLOCK|XBF_READ_AHEAD);
-
- memset(bp, 0, sizeof(xfs_buf_t));
- atomic_set(&bp->b_hold, 1);
- atomic_set(&bp->b_lru_ref, 1);
- init_completion(&bp->b_iowait);
- INIT_LIST_HEAD(&bp->b_lru);
- INIT_LIST_HEAD(&bp->b_list);
- RB_CLEAR_NODE(&bp->b_rbnode);
- sema_init(&bp->b_sema, 0); /* held, no waiters */
- XB_SET_OWNER(bp);
- bp->b_target = target;
- bp->b_file_offset = range_base;
- /*
- * Set buffer_length and count_desired to the same value initially.
- * I/O routines should use count_desired, which will be the same in
- * most cases but may be reset (e.g. XFS recovery).
- */
- bp->b_buffer_length = bp->b_count_desired = range_length;
- bp->b_flags = flags;
- bp->b_bn = XFS_BUF_DADDR_NULL;
- atomic_set(&bp->b_pin_count, 0);
- init_waitqueue_head(&bp->b_waiters);
-
- XFS_STATS_INC(xb_create);
-
- trace_xfs_buf_init(bp, _RET_IP_);
-}
-
-/*
- * Allocate a page array capable of holding a specified number
- * of pages, and point the page buf at it.
- */
-STATIC int
-_xfs_buf_get_pages(
- xfs_buf_t *bp,
- int page_count,
- xfs_buf_flags_t flags)
-{
- /* Make sure that we have a page list */
- if (bp->b_pages == NULL) {
- bp->b_offset = xfs_buf_poff(bp->b_file_offset);
- bp->b_page_count = page_count;
- if (page_count <= XB_PAGES) {
- bp->b_pages = bp->b_page_array;
- } else {
- bp->b_pages = kmem_alloc(sizeof(struct page *) *
- page_count, xb_to_km(flags));
- if (bp->b_pages == NULL)
- return -ENOMEM;
- }
- memset(bp->b_pages, 0, sizeof(struct page *) * page_count);
- }
- return 0;
-}
-
-/*
- * Frees b_pages if it was allocated.
- */
-STATIC void
-_xfs_buf_free_pages(
- xfs_buf_t *bp)
-{
- if (bp->b_pages != bp->b_page_array) {
- kmem_free(bp->b_pages);
- bp->b_pages = NULL;
- }
-}
-
-/*
- * Releases the specified buffer.
- *
- * The modification state of any associated pages is left unchanged.
- * The buffer most not be on any hash - use xfs_buf_rele instead for
- * hashed and refcounted buffers
- */
-void
-xfs_buf_free(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_free(bp, _RET_IP_);
-
- ASSERT(list_empty(&bp->b_lru));
-
- if (bp->b_flags & _XBF_PAGES) {
- uint i;
-
- if (xfs_buf_is_vmapped(bp))
- vm_unmap_ram(bp->b_addr - bp->b_offset,
- bp->b_page_count);
-
- for (i = 0; i < bp->b_page_count; i++) {
- struct page *page = bp->b_pages[i];
-
- __free_page(page);
- }
- } else if (bp->b_flags & _XBF_KMEM)
- kmem_free(bp->b_addr);
- _xfs_buf_free_pages(bp);
- xfs_buf_deallocate(bp);
-}
-
-/*
- * Allocates all the pages for buffer in question and builds it's page list.
- */
-STATIC int
-xfs_buf_allocate_memory(
- xfs_buf_t *bp,
- uint flags)
-{
- size_t size = bp->b_count_desired;
- size_t nbytes, offset;
- gfp_t gfp_mask = xb_to_gfp(flags);
- unsigned short page_count, i;
- xfs_off_t end;
- int error;
-
- /*
- * for buffers that are contained within a single page, just allocate
- * the memory from the heap - there's no need for the complexity of
- * page arrays to keep allocation down to order 0.
- */
- if (bp->b_buffer_length < PAGE_SIZE) {
- bp->b_addr = kmem_alloc(bp->b_buffer_length, xb_to_km(flags));
- if (!bp->b_addr) {
- /* low memory - use alloc_page loop instead */
- goto use_alloc_page;
- }
-
- if (((unsigned long)(bp->b_addr + bp->b_buffer_length - 1) &
- PAGE_MASK) !=
- ((unsigned long)bp->b_addr & PAGE_MASK)) {
- /* b_addr spans two pages - use alloc_page instead */
- kmem_free(bp->b_addr);
- bp->b_addr = NULL;
- goto use_alloc_page;
- }
- bp->b_offset = offset_in_page(bp->b_addr);
- bp->b_pages = bp->b_page_array;
- bp->b_pages[0] = virt_to_page(bp->b_addr);
- bp->b_page_count = 1;
- bp->b_flags |= XBF_MAPPED | _XBF_KMEM;
- return 0;
- }
-
-use_alloc_page:
- end = bp->b_file_offset + bp->b_buffer_length;
- page_count = xfs_buf_btoc(end) - xfs_buf_btoct(bp->b_file_offset);
- error = _xfs_buf_get_pages(bp, page_count, flags);
- if (unlikely(error))
- return error;
-
- offset = bp->b_offset;
- bp->b_flags |= _XBF_PAGES;
-
- for (i = 0; i < bp->b_page_count; i++) {
- struct page *page;
- uint retries = 0;
-retry:
- page = alloc_page(gfp_mask);
- if (unlikely(page == NULL)) {
- if (flags & XBF_READ_AHEAD) {
- bp->b_page_count = i;
- error = ENOMEM;
- goto out_free_pages;
- }
-
- /*
- * This could deadlock.
- *
- * But until all the XFS lowlevel code is revamped to
- * handle buffer allocation failures we can't do much.
- */
- if (!(++retries % 100))
- xfs_err(NULL,
- "possible memory allocation deadlock in %s (mode:0x%x)",
- __func__, gfp_mask);
-
- XFS_STATS_INC(xb_page_retries);
- congestion_wait(BLK_RW_ASYNC, HZ/50);
- goto retry;
- }
-
- XFS_STATS_INC(xb_page_found);
-
- nbytes = min_t(size_t, size, PAGE_SIZE - offset);
- size -= nbytes;
- bp->b_pages[i] = page;
- offset = 0;
- }
- return 0;
-
-out_free_pages:
- for (i = 0; i < bp->b_page_count; i++)
- __free_page(bp->b_pages[i]);
- return error;
-}
-
-/*
- * Map buffer into kernel address-space if necessary.
- */
-STATIC int
-_xfs_buf_map_pages(
- xfs_buf_t *bp,
- uint flags)
-{
- ASSERT(bp->b_flags & _XBF_PAGES);
- if (bp->b_page_count == 1) {
- /* A single page buffer is always mappable */
- bp->b_addr = page_address(bp->b_pages[0]) + bp->b_offset;
- bp->b_flags |= XBF_MAPPED;
- } else if (flags & XBF_MAPPED) {
- int retried = 0;
-
- do {
- bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,
- -1, PAGE_KERNEL);
- if (bp->b_addr)
- break;
- vm_unmap_aliases();
- } while (retried++ <= 1);
-
- if (!bp->b_addr)
- return -ENOMEM;
- bp->b_addr += bp->b_offset;
- bp->b_flags |= XBF_MAPPED;
- }
-
- return 0;
-}
-
-/*
- * Finding and Reading Buffers
- */
-
-/*
- * Look up, and creates if absent, a lockable buffer for
- * a given range of an inode. The buffer is returned
- * locked. If other overlapping buffers exist, they are
- * released before the new buffer is created and locked,
- * which may imply that this call will block until those buffers
- * are unlocked. No I/O is implied by this call.
- */
-xfs_buf_t *
-_xfs_buf_find(
- xfs_buftarg_t *btp, /* block device target */
- xfs_off_t ioff, /* starting offset of range */
- size_t isize, /* length of range */
- xfs_buf_flags_t flags,
- xfs_buf_t *new_bp)
-{
- xfs_off_t range_base;
- size_t range_length;
- struct xfs_perag *pag;
- struct rb_node **rbp;
- struct rb_node *parent;
- xfs_buf_t *bp;
-
- range_base = (ioff << BBSHIFT);
- range_length = (isize << BBSHIFT);
-
- /* Check for IOs smaller than the sector size / not sector aligned */
- ASSERT(!(range_length < (1 << btp->bt_sshift)));
- ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
-
- /* get tree root */
- pag = xfs_perag_get(btp->bt_mount,
- xfs_daddr_to_agno(btp->bt_mount, ioff));
-
- /* walk tree */
- spin_lock(&pag->pag_buf_lock);
- rbp = &pag->pag_buf_tree.rb_node;
- parent = NULL;
- bp = NULL;
- while (*rbp) {
- parent = *rbp;
- bp = rb_entry(parent, struct xfs_buf, b_rbnode);
-
- if (range_base < bp->b_file_offset)
- rbp = &(*rbp)->rb_left;
- else if (range_base > bp->b_file_offset)
- rbp = &(*rbp)->rb_right;
- else {
- /*
- * found a block offset match. If the range doesn't
- * match, the only way this is allowed is if the buffer
- * in the cache is stale and the transaction that made
- * it stale has not yet committed. i.e. we are
- * reallocating a busy extent. Skip this buffer and
- * continue searching to the right for an exact match.
- */
- if (bp->b_buffer_length != range_length) {
- ASSERT(bp->b_flags & XBF_STALE);
- rbp = &(*rbp)->rb_right;
- continue;
- }
- atomic_inc(&bp->b_hold);
- goto found;
- }
- }
-
- /* No match found */
- if (new_bp) {
- _xfs_buf_initialize(new_bp, btp, range_base,
- range_length, flags);
- rb_link_node(&new_bp->b_rbnode, parent, rbp);
- rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
- /* the buffer keeps the perag reference until it is freed */
- new_bp->b_pag = pag;
- spin_unlock(&pag->pag_buf_lock);
- } else {
- XFS_STATS_INC(xb_miss_locked);
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
- }
- return new_bp;
-
-found:
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
-
- if (xfs_buf_cond_lock(bp)) {
- /* failed, so wait for the lock if requested. */
- if (!(flags & XBF_TRYLOCK)) {
- xfs_buf_lock(bp);
- XFS_STATS_INC(xb_get_locked_waited);
- } else {
- xfs_buf_rele(bp);
- XFS_STATS_INC(xb_busy_locked);
- return NULL;
- }
- }
-
- /*
- * if the buffer is stale, clear all the external state associated with
- * it. We need to keep flags such as how we allocated the buffer memory
- * intact here.
- */
- if (bp->b_flags & XBF_STALE) {
- ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
- bp->b_flags &= XBF_MAPPED | _XBF_KMEM | _XBF_PAGES;
- }
-
- trace_xfs_buf_find(bp, flags, _RET_IP_);
- XFS_STATS_INC(xb_get_locked);
- return bp;
-}
-
-/*
- * Assembles a buffer covering the specified range.
- * Storage in memory for all portions of the buffer will be allocated,
- * although backing storage may not be.
- */
-xfs_buf_t *
-xfs_buf_get(
- xfs_buftarg_t *target,/* target for buffer */
- xfs_off_t ioff, /* starting offset of range */
- size_t isize, /* length of range */
- xfs_buf_flags_t flags)
-{
- xfs_buf_t *bp, *new_bp;
- int error = 0;
-
- new_bp = xfs_buf_allocate(flags);
- if (unlikely(!new_bp))
- return NULL;
-
- bp = _xfs_buf_find(target, ioff, isize, flags, new_bp);
- if (bp == new_bp) {
- error = xfs_buf_allocate_memory(bp, flags);
- if (error)
- goto no_buffer;
- } else {
- xfs_buf_deallocate(new_bp);
- if (unlikely(bp == NULL))
- return NULL;
- }
-
- if (!(bp->b_flags & XBF_MAPPED)) {
- error = _xfs_buf_map_pages(bp, flags);
- if (unlikely(error)) {
- xfs_warn(target->bt_mount,
- "%s: failed to map pages\n", __func__);
- goto no_buffer;
- }
- }
-
- XFS_STATS_INC(xb_get);
-
- /*
- * Always fill in the block number now, the mapped cases can do
- * their own overlay of this later.
- */
- bp->b_bn = ioff;
- bp->b_count_desired = bp->b_buffer_length;
-
- trace_xfs_buf_get(bp, flags, _RET_IP_);
- return bp;
-
- no_buffer:
- if (flags & (XBF_LOCK | XBF_TRYLOCK))
- xfs_buf_unlock(bp);
- xfs_buf_rele(bp);
- return NULL;
-}
-
-STATIC int
-_xfs_buf_read(
- xfs_buf_t *bp,
- xfs_buf_flags_t flags)
-{
- int status;
-
- ASSERT(!(flags & (XBF_DELWRI|XBF_WRITE)));
- ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
-
- bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_DELWRI | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
- bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | \
- XBF_READ_AHEAD | _XBF_RUN_QUEUES);
-
- status = xfs_buf_iorequest(bp);
- if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
- return status;
- return xfs_buf_iowait(bp);
-}
-
-xfs_buf_t *
-xfs_buf_read(
- xfs_buftarg_t *target,
- xfs_off_t ioff,
- size_t isize,
- xfs_buf_flags_t flags)
-{
- xfs_buf_t *bp;
-
- flags |= XBF_READ;
-
- bp = xfs_buf_get(target, ioff, isize, flags);
- if (bp) {
- trace_xfs_buf_read(bp, flags, _RET_IP_);
-
- if (!XFS_BUF_ISDONE(bp)) {
- XFS_STATS_INC(xb_get_read);
- _xfs_buf_read(bp, flags);
- } else if (flags & XBF_ASYNC) {
- /*
- * Read ahead call which is already satisfied,
- * drop the buffer
- */
- goto no_buffer;
- } else {
- /* We do not want read in the flags */
- bp->b_flags &= ~XBF_READ;
- }
- }
-
- return bp;
-
- no_buffer:
- if (flags & (XBF_LOCK | XBF_TRYLOCK))
- xfs_buf_unlock(bp);
- xfs_buf_rele(bp);
- return NULL;
-}
-
-/*
- * If we are not low on memory then do the readahead in a deadlock
- * safe manner.
- */
-void
-xfs_buf_readahead(
- xfs_buftarg_t *target,
- xfs_off_t ioff,
- size_t isize)
-{
- if (bdi_read_congested(target->bt_bdi))
- return;
-
- xfs_buf_read(target, ioff, isize,
- XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
-}
-
-/*
- * Read an uncached buffer from disk. Allocates and returns a locked
- * buffer containing the disk contents or nothing.
- */
-struct xfs_buf *
-xfs_buf_read_uncached(
- struct xfs_mount *mp,
- struct xfs_buftarg *target,
- xfs_daddr_t daddr,
- size_t length,
- int flags)
-{
- xfs_buf_t *bp;
- int error;
-
- bp = xfs_buf_get_uncached(target, length, flags);
- if (!bp)
- return NULL;
-
- /* set up the buffer for a read IO */
- xfs_buf_lock(bp);
- XFS_BUF_SET_ADDR(bp, daddr);
- XFS_BUF_READ(bp);
- XFS_BUF_BUSY(bp);
-
- xfsbdstrat(mp, bp);
- error = xfs_buf_iowait(bp);
- if (error || bp->b_error) {
- xfs_buf_relse(bp);
- return NULL;
- }
- return bp;
-}
-
-xfs_buf_t *
-xfs_buf_get_empty(
- size_t len,
- xfs_buftarg_t *target)
-{
- xfs_buf_t *bp;
-
- bp = xfs_buf_allocate(0);
- if (bp)
- _xfs_buf_initialize(bp, target, 0, len, 0);
- return bp;
-}
-
-/*
- * Return a buffer allocated as an empty buffer and associated to external
- * memory via xfs_buf_associate_memory() back to it's empty state.
- */
-void
-xfs_buf_set_empty(
- struct xfs_buf *bp,
- size_t len)
-{
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_page_count = 0;
- bp->b_addr = NULL;
- bp->b_file_offset = 0;
- bp->b_buffer_length = bp->b_count_desired = len;
- bp->b_bn = XFS_BUF_DADDR_NULL;
- bp->b_flags &= ~XBF_MAPPED;
-}
-
-static inline struct page *
-mem_to_page(
- void *addr)
-{
- if ((!is_vmalloc_addr(addr))) {
- return virt_to_page(addr);
- } else {
- return vmalloc_to_page(addr);
- }
-}
-
-int
-xfs_buf_associate_memory(
- xfs_buf_t *bp,
- void *mem,
- size_t len)
-{
- int rval;
- int i = 0;
- unsigned long pageaddr;
- unsigned long offset;
- size_t buflen;
- int page_count;
-
- pageaddr = (unsigned long)mem & PAGE_MASK;
- offset = (unsigned long)mem - pageaddr;
- buflen = PAGE_ALIGN(len + offset);
- page_count = buflen >> PAGE_SHIFT;
-
- /* Free any previous set of page pointers */
- if (bp->b_pages)
- _xfs_buf_free_pages(bp);
-
- bp->b_pages = NULL;
- bp->b_addr = mem;
-
- rval = _xfs_buf_get_pages(bp, page_count, XBF_DONT_BLOCK);
- if (rval)
- return rval;
-
- bp->b_offset = offset;
-
- for (i = 0; i < bp->b_page_count; i++) {
- bp->b_pages[i] = mem_to_page((void *)pageaddr);
- pageaddr += PAGE_SIZE;
- }
-
- bp->b_count_desired = len;
- bp->b_buffer_length = buflen;
- bp->b_flags |= XBF_MAPPED;
-
- return 0;
-}
-
-xfs_buf_t *
-xfs_buf_get_uncached(
- struct xfs_buftarg *target,
- size_t len,
- int flags)
-{
- unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
- int error, i;
- xfs_buf_t *bp;
-
- bp = xfs_buf_allocate(0);
- if (unlikely(bp == NULL))
- goto fail;
- _xfs_buf_initialize(bp, target, 0, len, 0);
-
- error = _xfs_buf_get_pages(bp, page_count, 0);
- if (error)
- goto fail_free_buf;
-
- for (i = 0; i < page_count; i++) {
- bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
- if (!bp->b_pages[i])
- goto fail_free_mem;
- }
- bp->b_flags |= _XBF_PAGES;
-
- error = _xfs_buf_map_pages(bp, XBF_MAPPED);
- if (unlikely(error)) {
- xfs_warn(target->bt_mount,
- "%s: failed to map pages\n", __func__);
- goto fail_free_mem;
- }
-
- xfs_buf_unlock(bp);
-
- trace_xfs_buf_get_uncached(bp, _RET_IP_);
- return bp;
-
- fail_free_mem:
- while (--i >= 0)
- __free_page(bp->b_pages[i]);
- _xfs_buf_free_pages(bp);
- fail_free_buf:
- xfs_buf_deallocate(bp);
- fail:
- return NULL;
-}
-
-/*
- * Increment reference count on buffer, to hold the buffer concurrently
- * with another thread which may release (free) the buffer asynchronously.
- * Must hold the buffer already to call this function.
- */
-void
-xfs_buf_hold(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_hold(bp, _RET_IP_);
- atomic_inc(&bp->b_hold);
-}
-
-/*
- * Releases a hold on the specified buffer. If the
- * the hold count is 1, calls xfs_buf_free.
- */
-void
-xfs_buf_rele(
- xfs_buf_t *bp)
-{
- struct xfs_perag *pag = bp->b_pag;
-
- trace_xfs_buf_rele(bp, _RET_IP_);
-
- if (!pag) {
- ASSERT(list_empty(&bp->b_lru));
- ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
- if (atomic_dec_and_test(&bp->b_hold))
- xfs_buf_free(bp);
- return;
- }
-
- ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
-
- ASSERT(atomic_read(&bp->b_hold) > 0);
- if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
- if (!(bp->b_flags & XBF_STALE) &&
- atomic_read(&bp->b_lru_ref)) {
- xfs_buf_lru_add(bp);
- spin_unlock(&pag->pag_buf_lock);
- } else {
- xfs_buf_lru_del(bp);
- ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
- rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
- spin_unlock(&pag->pag_buf_lock);
- xfs_perag_put(pag);
- xfs_buf_free(bp);
- }
- }
-}
-
-
-/*
- * Lock a buffer object, if it is not already locked.
- *
- * If we come across a stale, pinned, locked buffer, we know that we are
- * being asked to lock a buffer that has been reallocated. Because it is
- * pinned, we know that the log has not been pushed to disk and hence it
- * will still be locked. Rather than continuing to have trylock attempts
- * fail until someone else pushes the log, push it ourselves before
- * returning. This means that the xfsaild will not get stuck trying
- * to push on stale inode buffers.
- */
-int
-xfs_buf_cond_lock(
- xfs_buf_t *bp)
-{
- int locked;
-
- locked = down_trylock(&bp->b_sema) == 0;
- if (locked)
- XB_SET_OWNER(bp);
- else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_target->bt_mount, 0);
-
- trace_xfs_buf_cond_lock(bp, _RET_IP_);
- return locked ? 0 : -EBUSY;
-}
-
-int
-xfs_buf_lock_value(
- xfs_buf_t *bp)
-{
- return bp->b_sema.count;
-}
-
-/*
- * Lock a buffer object.
- *
- * If we come across a stale, pinned, locked buffer, we know that we
- * are being asked to lock a buffer that has been reallocated. Because
- * it is pinned, we know that the log has not been pushed to disk and
- * hence it will still be locked. Rather than sleeping until someone
- * else pushes the log, push it ourselves before trying to get the lock.
- */
-void
-xfs_buf_lock(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_lock(bp, _RET_IP_);
-
- if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_target->bt_mount, 0);
- down(&bp->b_sema);
- XB_SET_OWNER(bp);
-
- trace_xfs_buf_lock_done(bp, _RET_IP_);
-}
-
-/*
- * Releases the lock on the buffer object.
- * If the buffer is marked delwri but is not queued, do so before we
- * unlock the buffer as we need to set flags correctly. We also need to
- * take a reference for the delwri queue because the unlocker is going to
- * drop their's and they don't know we just queued it.
- */
-void
-xfs_buf_unlock(
- xfs_buf_t *bp)
-{
- if ((bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)) == XBF_DELWRI) {
- atomic_inc(&bp->b_hold);
- bp->b_flags |= XBF_ASYNC;
- xfs_buf_delwri_queue(bp, 0);
- }
-
- XB_CLEAR_OWNER(bp);
- up(&bp->b_sema);
-
- trace_xfs_buf_unlock(bp, _RET_IP_);
-}
-
-STATIC void
-xfs_buf_wait_unpin(
- xfs_buf_t *bp)
-{
- DECLARE_WAITQUEUE (wait, current);
-
- if (atomic_read(&bp->b_pin_count) == 0)
- return;
-
- add_wait_queue(&bp->b_waiters, &wait);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&bp->b_pin_count) == 0)
- break;
- io_schedule();
- }
- remove_wait_queue(&bp->b_waiters, &wait);
- set_current_state(TASK_RUNNING);
-}
-
-/*
- * Buffer Utility Routines
- */
-
-STATIC void
-xfs_buf_iodone_work(
- struct work_struct *work)
-{
- xfs_buf_t *bp =
- container_of(work, xfs_buf_t, b_iodone_work);
-
- if (bp->b_iodone)
- (*(bp->b_iodone))(bp);
- else if (bp->b_flags & XBF_ASYNC)
- xfs_buf_relse(bp);
-}
-
-void
-xfs_buf_ioend(
- xfs_buf_t *bp,
- int schedule)
-{
- trace_xfs_buf_iodone(bp, _RET_IP_);
-
- bp->b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD);
- if (bp->b_error == 0)
- bp->b_flags |= XBF_DONE;
-
- if ((bp->b_iodone) || (bp->b_flags & XBF_ASYNC)) {
- if (schedule) {
- INIT_WORK(&bp->b_iodone_work, xfs_buf_iodone_work);
- queue_work(xfslogd_workqueue, &bp->b_iodone_work);
- } else {
- xfs_buf_iodone_work(&bp->b_iodone_work);
- }
- } else {
- complete(&bp->b_iowait);
- }
-}
-
-void
-xfs_buf_ioerror(
- xfs_buf_t *bp,
- int error)
-{
- ASSERT(error >= 0 && error <= 0xffff);
- bp->b_error = (unsigned short)error;
- trace_xfs_buf_ioerror(bp, error, _RET_IP_);
-}
-
-int
-xfs_bwrite(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- int error;
-
- bp->b_flags |= XBF_WRITE;
- bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
-
- xfs_buf_delwri_dequeue(bp);
- xfs_bdstrat_cb(bp);
-
- error = xfs_buf_iowait(bp);
- if (error)
- xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
- xfs_buf_relse(bp);
- return error;
-}
-
-void
-xfs_bdwrite(
- void *mp,
- struct xfs_buf *bp)
-{
- trace_xfs_buf_bdwrite(bp, _RET_IP_);
-
- bp->b_flags &= ~XBF_READ;
- bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
-
- xfs_buf_delwri_queue(bp, 1);
-}
-
-/*
- * Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
- * so that the proper iodone callbacks get called.
- */
-STATIC int
-xfs_bioerror(
- xfs_buf_t *bp)
-{
-#ifdef XFSERRORDEBUG
- ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
-#endif
-
- /*
- * No need to wait until the buffer is unpinned, we aren't flushing it.
- */
- XFS_BUF_ERROR(bp, EIO);
-
- /*
- * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
- */
- XFS_BUF_UNREAD(bp);
- XFS_BUF_UNDELAYWRITE(bp);
- XFS_BUF_UNDONE(bp);
- XFS_BUF_STALE(bp);
-
- xfs_buf_ioend(bp, 0);
-
- return EIO;
-}
-
-/*
- * Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the xfs_buf_ioend call.
- * This is meant for userdata errors; metadata bufs come with
- * iodone functions attached, so that we can track down errors.
- */
-STATIC int
-xfs_bioerror_relse(
- struct xfs_buf *bp)
-{
- int64_t fl = XFS_BUF_BFLAGS(bp);
- /*
- * No need to wait until the buffer is unpinned.
- * We aren't flushing it.
- *
- * chunkhold expects B_DONE to be set, whether
- * we actually finish the I/O or not. We don't want to
- * change that interface.
- */
- XFS_BUF_UNREAD(bp);
- XFS_BUF_UNDELAYWRITE(bp);
- XFS_BUF_DONE(bp);
- XFS_BUF_STALE(bp);
- XFS_BUF_CLR_IODONE_FUNC(bp);
- if (!(fl & XBF_ASYNC)) {
- /*
- * Mark b_error and B_ERROR _both_.
- * Lot's of chunkcache code assumes that.
- * There's no reason to mark error for
- * ASYNC buffers.
- */
- XFS_BUF_ERROR(bp, EIO);
- XFS_BUF_FINISH_IOWAIT(bp);
- } else {
- xfs_buf_relse(bp);
- }
-
- return EIO;
-}
-
-
-/*
- * All xfs metadata buffers except log state machine buffers
- * get this attached as their b_bdstrat callback function.
- * This is so that we can catch a buffer
- * after prematurely unpinning it to forcibly shutdown the filesystem.
- */
-int
-xfs_bdstrat_cb(
- struct xfs_buf *bp)
-{
- if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
- trace_xfs_bdstrat_shut(bp, _RET_IP_);
- /*
- * Metadata write that didn't get logged but
- * written delayed anyway. These aren't associated
- * with a transaction, and can be ignored.
- */
- if (!bp->b_iodone && !XFS_BUF_ISREAD(bp))
- return xfs_bioerror_relse(bp);
- else
- return xfs_bioerror(bp);
- }
-
- xfs_buf_iorequest(bp);
- return 0;
-}
-
-/*
- * Wrapper around bdstrat so that we can stop data from going to disk in case
- * we are shutting down the filesystem. Typically user data goes thru this
- * path; one of the exceptions is the superblock.
- */
-void
-xfsbdstrat(
- struct xfs_mount *mp,
- struct xfs_buf *bp)
-{
- if (XFS_FORCED_SHUTDOWN(mp)) {
- trace_xfs_bdstrat_shut(bp, _RET_IP_);
- xfs_bioerror_relse(bp);
- return;
- }
-
- xfs_buf_iorequest(bp);
-}
-
-STATIC void
-_xfs_buf_ioend(
- xfs_buf_t *bp,
- int schedule)
-{
- if (atomic_dec_and_test(&bp->b_io_remaining) == 1)
- xfs_buf_ioend(bp, schedule);
-}
-
-STATIC void
-xfs_buf_bio_end_io(
- struct bio *bio,
- int error)
-{
- xfs_buf_t *bp = (xfs_buf_t *)bio->bi_private;
-
- xfs_buf_ioerror(bp, -error);
-
- if (!error && xfs_buf_is_vmapped(bp) && (bp->b_flags & XBF_READ))
- invalidate_kernel_vmap_range(bp->b_addr, xfs_buf_vmap_len(bp));
-
- _xfs_buf_ioend(bp, 1);
- bio_put(bio);
-}
-
-STATIC void
-_xfs_buf_ioapply(
- xfs_buf_t *bp)
-{
- int rw, map_i, total_nr_pages, nr_pages;
- struct bio *bio;
- int offset = bp->b_offset;
- int size = bp->b_count_desired;
- sector_t sector = bp->b_bn;
-
- total_nr_pages = bp->b_page_count;
- map_i = 0;
-
- if (bp->b_flags & XBF_ORDERED) {
- ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_FLUSH_FUA;
- } else if (bp->b_flags & XBF_LOG_BUFFER) {
- ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
- } else if (bp->b_flags & _XBF_RUN_QUEUES) {
- ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
- bp->b_flags &= ~_XBF_RUN_QUEUES;
- rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
- } else {
- rw = (bp->b_flags & XBF_WRITE) ? WRITE :
- (bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
- }
-
-
-next_chunk:
- atomic_inc(&bp->b_io_remaining);
- nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT);
- if (nr_pages > total_nr_pages)
- nr_pages = total_nr_pages;
-
- bio = bio_alloc(GFP_NOIO, nr_pages);
- bio->bi_bdev = bp->b_target->bt_bdev;
- bio->bi_sector = sector;
- bio->bi_end_io = xfs_buf_bio_end_io;
- bio->bi_private = bp;
-
-
- for (; size && nr_pages; nr_pages--, map_i++) {
- int rbytes, nbytes = PAGE_SIZE - offset;
-
- if (nbytes > size)
- nbytes = size;
-
- rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
- if (rbytes < nbytes)
- break;
-
- offset = 0;
- sector += nbytes >> BBSHIFT;
- size -= nbytes;
- total_nr_pages--;
- }
-
- if (likely(bio->bi_size)) {
- if (xfs_buf_is_vmapped(bp)) {
- flush_kernel_vmap_range(bp->b_addr,
- xfs_buf_vmap_len(bp));
- }
- submit_bio(rw, bio);
- if (size)
- goto next_chunk;
- } else {
- xfs_buf_ioerror(bp, EIO);
- bio_put(bio);
- }
-}
-
-int
-xfs_buf_iorequest(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_iorequest(bp, _RET_IP_);
-
- if (bp->b_flags & XBF_DELWRI) {
- xfs_buf_delwri_queue(bp, 1);
- return 0;
- }
-
- if (bp->b_flags & XBF_WRITE) {
- xfs_buf_wait_unpin(bp);
- }
-
- xfs_buf_hold(bp);
-
- /* Set the count to 1 initially, this will stop an I/O
- * completion callout which happens before we have started
- * all the I/O from calling xfs_buf_ioend too early.
- */
- atomic_set(&bp->b_io_remaining, 1);
- _xfs_buf_ioapply(bp);
- _xfs_buf_ioend(bp, 0);
-
- xfs_buf_rele(bp);
- return 0;
-}
-
-/*
- * Waits for I/O to complete on the buffer supplied.
- * It returns immediately if no I/O is pending.
- * It returns the I/O error code, if any, or 0 if there was no error.
- */
-int
-xfs_buf_iowait(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_iowait(bp, _RET_IP_);
-
- wait_for_completion(&bp->b_iowait);
-
- trace_xfs_buf_iowait_done(bp, _RET_IP_);
- return bp->b_error;
-}
-
-xfs_caddr_t
-xfs_buf_offset(
- xfs_buf_t *bp,
- size_t offset)
-{
- struct page *page;
-
- if (bp->b_flags & XBF_MAPPED)
- return XFS_BUF_PTR(bp) + offset;
-
- offset += bp->b_offset;
- page = bp->b_pages[offset >> PAGE_SHIFT];
- return (xfs_caddr_t)page_address(page) + (offset & (PAGE_SIZE-1));
-}
-
-/*
- * Move data into or out of a buffer.
- */
-void
-xfs_buf_iomove(
- xfs_buf_t *bp, /* buffer to process */
- size_t boff, /* starting buffer offset */
- size_t bsize, /* length to copy */
- void *data, /* data address */
- xfs_buf_rw_t mode) /* read/write/zero flag */
-{
- size_t bend, cpoff, csize;
- struct page *page;
-
- bend = boff + bsize;
- while (boff < bend) {
- page = bp->b_pages[xfs_buf_btoct(boff + bp->b_offset)];
- cpoff = xfs_buf_poff(boff + bp->b_offset);
- csize = min_t(size_t,
- PAGE_SIZE-cpoff, bp->b_count_desired-boff);
-
- ASSERT(((csize + cpoff) <= PAGE_SIZE));
-
- switch (mode) {
- case XBRW_ZERO:
- memset(page_address(page) + cpoff, 0, csize);
- break;
- case XBRW_READ:
- memcpy(data, page_address(page) + cpoff, csize);
- break;
- case XBRW_WRITE:
- memcpy(page_address(page) + cpoff, data, csize);
- }
-
- boff += csize;
- data += csize;
- }
-}
-
-/*
- * Handling of buffer targets (buftargs).
- */
-
-/*
- * Wait for any bufs with callbacks that have been submitted but have not yet
- * returned. These buffers will have an elevated hold count, so wait on those
- * while freeing all the buffers only held by the LRU.
- */
-void
-xfs_wait_buftarg(
- struct xfs_buftarg *btp)
-{
- struct xfs_buf *bp;
-
-restart:
- spin_lock(&btp->bt_lru_lock);
- while (!list_empty(&btp->bt_lru)) {
- bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
- if (atomic_read(&bp->b_hold) > 1) {
- spin_unlock(&btp->bt_lru_lock);
- delay(100);
- goto restart;
- }
- /*
- * clear the LRU reference count so the bufer doesn't get
- * ignored in xfs_buf_rele().
- */
- atomic_set(&bp->b_lru_ref, 0);
- spin_unlock(&btp->bt_lru_lock);
- xfs_buf_rele(bp);
- spin_lock(&btp->bt_lru_lock);
- }
- spin_unlock(&btp->bt_lru_lock);
-}
-
-int
-xfs_buftarg_shrink(
- struct shrinker *shrink,
- struct shrink_control *sc)
-{
- struct xfs_buftarg *btp = container_of(shrink,
- struct xfs_buftarg, bt_shrinker);
- struct xfs_buf *bp;
- int nr_to_scan = sc->nr_to_scan;
- LIST_HEAD(dispose);
-
- if (!nr_to_scan)
- return btp->bt_lru_nr;
-
- spin_lock(&btp->bt_lru_lock);
- while (!list_empty(&btp->bt_lru)) {
- if (nr_to_scan-- <= 0)
- break;
-
- bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-
- /*
- * Decrement the b_lru_ref count unless the value is already
- * zero. If the value is already zero, we need to reclaim the
- * buffer, otherwise it gets another trip through the LRU.
- */
- if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
- list_move_tail(&bp->b_lru, &btp->bt_lru);
- continue;
- }
-
- /*
- * remove the buffer from the LRU now to avoid needing another
- * lock round trip inside xfs_buf_rele().
- */
- list_move(&bp->b_lru, &dispose);
- btp->bt_lru_nr--;
- }
- spin_unlock(&btp->bt_lru_lock);
-
- while (!list_empty(&dispose)) {
- bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
- list_del_init(&bp->b_lru);
- xfs_buf_rele(bp);
- }
-
- return btp->bt_lru_nr;
-}
-
-void
-xfs_free_buftarg(
- struct xfs_mount *mp,
- struct xfs_buftarg *btp)
-{
- unregister_shrinker(&btp->bt_shrinker);
-
- xfs_flush_buftarg(btp, 1);
- if (mp->m_flags & XFS_MOUNT_BARRIER)
- xfs_blkdev_issue_flush(btp);
-
- kthread_stop(btp->bt_task);
- kmem_free(btp);
-}
-
-STATIC int
-xfs_setsize_buftarg_flags(
- xfs_buftarg_t *btp,
- unsigned int blocksize,
- unsigned int sectorsize,
- int verbose)
-{
- btp->bt_bsize = blocksize;
- btp->bt_sshift = ffs(sectorsize) - 1;
- btp->bt_smask = sectorsize - 1;
-
- if (set_blocksize(btp->bt_bdev, sectorsize)) {
- xfs_warn(btp->bt_mount,
- "Cannot set_blocksize to %u on device %s\n",
- sectorsize, XFS_BUFTARG_NAME(btp));
- return EINVAL;
- }
-
- return 0;
-}
-
-/*
- * When allocating the initial buffer target we have not yet
- * read in the superblock, so don't know what sized sectors
- * are being used is at this early stage. Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
- xfs_buftarg_t *btp,
- struct block_device *bdev)
-{
- return xfs_setsize_buftarg_flags(btp,
- PAGE_SIZE, bdev_logical_block_size(bdev), 0);
-}
-
-int
-xfs_setsize_buftarg(
- xfs_buftarg_t *btp,
- unsigned int blocksize,
- unsigned int sectorsize)
-{
- return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1);
-}
-
-STATIC int
-xfs_alloc_delwrite_queue(
- xfs_buftarg_t *btp,
- const char *fsname)
-{
- INIT_LIST_HEAD(&btp->bt_delwrite_queue);
- spin_lock_init(&btp->bt_delwrite_lock);
- btp->bt_flags = 0;
- btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
- if (IS_ERR(btp->bt_task))
- return PTR_ERR(btp->bt_task);
- return 0;
-}
-
-xfs_buftarg_t *
-xfs_alloc_buftarg(
- struct xfs_mount *mp,
- struct block_device *bdev,
- int external,
- const char *fsname)
-{
- xfs_buftarg_t *btp;
-
- btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
-
- btp->bt_mount = mp;
- btp->bt_dev = bdev->bd_dev;
- btp->bt_bdev = bdev;
- btp->bt_bdi = blk_get_backing_dev_info(bdev);
- if (!btp->bt_bdi)
- goto error;
-
- INIT_LIST_HEAD(&btp->bt_lru);
- spin_lock_init(&btp->bt_lru_lock);
- if (xfs_setsize_buftarg_early(btp, bdev))
- goto error;
- if (xfs_alloc_delwrite_queue(btp, fsname))
- goto error;
- btp->bt_shrinker.shrink = xfs_buftarg_shrink;
- btp->bt_shrinker.seeks = DEFAULT_SEEKS;
- register_shrinker(&btp->bt_shrinker);
- return btp;
-
-error:
- kmem_free(btp);
- return NULL;
-}
-
-
-/*
- * Delayed write buffer handling
- */
-STATIC void
-xfs_buf_delwri_queue(
- xfs_buf_t *bp,
- int unlock)
-{
- struct list_head *dwq = &bp->b_target->bt_delwrite_queue;
- spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
-
- trace_xfs_buf_delwri_queue(bp, _RET_IP_);
-
- ASSERT((bp->b_flags&(XBF_DELWRI|XBF_ASYNC)) == (XBF_DELWRI|XBF_ASYNC));
-
- spin_lock(dwlk);
- /* If already in the queue, dequeue and place at tail */
- if (!list_empty(&bp->b_list)) {
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
- if (unlock)
- atomic_dec(&bp->b_hold);
- list_del(&bp->b_list);
- }
-
- if (list_empty(dwq)) {
- /* start xfsbufd as it is about to have something to do */
- wake_up_process(bp->b_target->bt_task);
- }
-
- bp->b_flags |= _XBF_DELWRI_Q;
- list_add_tail(&bp->b_list, dwq);
- bp->b_queuetime = jiffies;
- spin_unlock(dwlk);
-
- if (unlock)
- xfs_buf_unlock(bp);
-}
-
-void
-xfs_buf_delwri_dequeue(
- xfs_buf_t *bp)
-{
- spinlock_t *dwlk = &bp->b_target->bt_delwrite_lock;
- int dequeued = 0;
-
- spin_lock(dwlk);
- if ((bp->b_flags & XBF_DELWRI) && !list_empty(&bp->b_list)) {
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
- list_del_init(&bp->b_list);
- dequeued = 1;
- }
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q);
- spin_unlock(dwlk);
-
- if (dequeued)
- xfs_buf_rele(bp);
-
- trace_xfs_buf_delwri_dequeue(bp, _RET_IP_);
-}
-
-/*
- * If a delwri buffer needs to be pushed before it has aged out, then promote
- * it to the head of the delwri queue so that it will be flushed on the next
- * xfsbufd run. We do this by resetting the queuetime of the buffer to be older
- * than the age currently needed to flush the buffer. Hence the next time the
- * xfsbufd sees it is guaranteed to be considered old enough to flush.
- */
-void
-xfs_buf_delwri_promote(
- struct xfs_buf *bp)
-{
- struct xfs_buftarg *btp = bp->b_target;
- long age = xfs_buf_age_centisecs * msecs_to_jiffies(10) + 1;
-
- ASSERT(bp->b_flags & XBF_DELWRI);
- ASSERT(bp->b_flags & _XBF_DELWRI_Q);
-
- /*
- * Check the buffer age before locking the delayed write queue as we
- * don't need to promote buffers that are already past the flush age.
- */
- if (bp->b_queuetime < jiffies - age)
- return;
- bp->b_queuetime = jiffies - age;
- spin_lock(&btp->bt_delwrite_lock);
- list_move(&bp->b_list, &btp->bt_delwrite_queue);
- spin_unlock(&btp->bt_delwrite_lock);
-}
-
-STATIC void
-xfs_buf_runall_queues(
- struct workqueue_struct *queue)
-{
- flush_workqueue(queue);
-}
-
-/*
- * Move as many buffers as specified to the supplied list
- * idicating if we skipped any buffers to prevent deadlocks.
- */
-STATIC int
-xfs_buf_delwri_split(
- xfs_buftarg_t *target,
- struct list_head *list,
- unsigned long age)
-{
- xfs_buf_t *bp, *n;
- struct list_head *dwq = &target->bt_delwrite_queue;
- spinlock_t *dwlk = &target->bt_delwrite_lock;
- int skipped = 0;
- int force;
-
- force = test_and_clear_bit(XBT_FORCE_FLUSH, &target->bt_flags);
- INIT_LIST_HEAD(list);
- spin_lock(dwlk);
- list_for_each_entry_safe(bp, n, dwq, b_list) {
- ASSERT(bp->b_flags & XBF_DELWRI);
-
- if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
- if (!force &&
- time_before(jiffies, bp->b_queuetime + age)) {
- xfs_buf_unlock(bp);
- break;
- }
-
- bp->b_flags &= ~(XBF_DELWRI|_XBF_DELWRI_Q|
- _XBF_RUN_QUEUES);
- bp->b_flags |= XBF_WRITE;
- list_move_tail(&bp->b_list, list);
- trace_xfs_buf_delwri_split(bp, _RET_IP_);
- } else
- skipped++;
- }
- spin_unlock(dwlk);
-
- return skipped;
-
-}
-
-/*
- * Compare function is more complex than it needs to be because
- * the return value is only 32 bits and we are doing comparisons
- * on 64 bit values
- */
-static int
-xfs_buf_cmp(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_buf *ap = container_of(a, struct xfs_buf, b_list);
- struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
- xfs_daddr_t diff;
-
- diff = ap->b_bn - bp->b_bn;
- if (diff < 0)
- return -1;
- if (diff > 0)
- return 1;
- return 0;
-}
-
-void
-xfs_buf_delwri_sort(
- xfs_buftarg_t *target,
- struct list_head *list)
-{
- list_sort(NULL, list, xfs_buf_cmp);
-}
-
-STATIC int
-xfsbufd(
- void *data)
-{
- xfs_buftarg_t *target = (xfs_buftarg_t *)data;
-
- current->flags |= PF_MEMALLOC;
-
- set_freezable();
-
- do {
- long age = xfs_buf_age_centisecs * msecs_to_jiffies(10);
- long tout = xfs_buf_timer_centisecs * msecs_to_jiffies(10);
- struct list_head tmp;
- struct blk_plug plug;
-
- if (unlikely(freezing(current))) {
- set_bit(XBT_FORCE_SLEEP, &target->bt_flags);
- refrigerator();
- } else {
- clear_bit(XBT_FORCE_SLEEP, &target->bt_flags);
- }
-
- /* sleep for a long time if there is nothing to do. */
- if (list_empty(&target->bt_delwrite_queue))
- tout = MAX_SCHEDULE_TIMEOUT;
- schedule_timeout_interruptible(tout);
-
- xfs_buf_delwri_split(target, &tmp, age);
- list_sort(NULL, &tmp, xfs_buf_cmp);
-
- blk_start_plug(&plug);
- while (!list_empty(&tmp)) {
- struct xfs_buf *bp;
- bp = list_first_entry(&tmp, struct xfs_buf, b_list);
- list_del_init(&bp->b_list);
- xfs_bdstrat_cb(bp);
- }
- blk_finish_plug(&plug);
- } while (!kthread_should_stop());
-
- return 0;
-}
-
-/*
- * Go through all incore buffers, and release buffers if they belong to
- * the given device. This is used in filesystem error handling to
- * preserve the consistency of its metadata.
- */
-int
-xfs_flush_buftarg(
- xfs_buftarg_t *target,
- int wait)
-{
- xfs_buf_t *bp;
- int pincount = 0;
- LIST_HEAD(tmp_list);
- LIST_HEAD(wait_list);
- struct blk_plug plug;
-
- xfs_buf_runall_queues(xfsconvertd_workqueue);
- xfs_buf_runall_queues(xfsdatad_workqueue);
- xfs_buf_runall_queues(xfslogd_workqueue);
-
- set_bit(XBT_FORCE_FLUSH, &target->bt_flags);
- pincount = xfs_buf_delwri_split(target, &tmp_list, 0);
-
- /*
- * Dropped the delayed write list lock, now walk the temporary list.
- * All I/O is issued async and then if we need to wait for completion
- * we do that after issuing all the IO.
- */
- list_sort(NULL, &tmp_list, xfs_buf_cmp);
-
- blk_start_plug(&plug);
- while (!list_empty(&tmp_list)) {
- bp = list_first_entry(&tmp_list, struct xfs_buf, b_list);
- ASSERT(target == bp->b_target);
- list_del_init(&bp->b_list);
- if (wait) {
- bp->b_flags &= ~XBF_ASYNC;
- list_add(&bp->b_list, &wait_list);
- }
- xfs_bdstrat_cb(bp);
- }
- blk_finish_plug(&plug);
-
- if (wait) {
- /* Wait for IO to complete. */
- while (!list_empty(&wait_list)) {
- bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
-
- list_del_init(&bp->b_list);
- xfs_buf_iowait(bp);
- xfs_buf_relse(bp);
- }
- }
-
- return pincount;
-}
-
-int __init
-xfs_buf_init(void)
-{
- xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
- KM_ZONE_HWALIGN, NULL);
- if (!xfs_buf_zone)
- goto out;
-
- xfslogd_workqueue = alloc_workqueue("xfslogd",
- WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
- if (!xfslogd_workqueue)
- goto out_free_buf_zone;
-
- xfsdatad_workqueue = alloc_workqueue("xfsdatad", WQ_MEM_RECLAIM, 1);
- if (!xfsdatad_workqueue)
- goto out_destroy_xfslogd_workqueue;
-
- xfsconvertd_workqueue = alloc_workqueue("xfsconvertd",
- WQ_MEM_RECLAIM, 1);
- if (!xfsconvertd_workqueue)
- goto out_destroy_xfsdatad_workqueue;
-
- return 0;
-
- out_destroy_xfsdatad_workqueue:
- destroy_workqueue(xfsdatad_workqueue);
- out_destroy_xfslogd_workqueue:
- destroy_workqueue(xfslogd_workqueue);
- out_free_buf_zone:
- kmem_zone_destroy(xfs_buf_zone);
- out:
- return -ENOMEM;
-}
-
-void
-xfs_buf_terminate(void)
-{
- destroy_workqueue(xfsconvertd_workqueue);
- destroy_workqueue(xfsdatad_workqueue);
- destroy_workqueue(xfslogd_workqueue);
- kmem_zone_destroy(xfs_buf_zone);
-}
-
-#ifdef CONFIG_KDB_MODULES
-struct list_head *
-xfs_get_buftarg_list(void)
-{
- return &xfs_buftarg_list;
-}
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
deleted file mode 100644
index 36d6ee4..0000000
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ /dev/null
@@ -1,351 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_BUF_H__
-#define __XFS_BUF_H__
-
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <asm/system.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/buffer_head.h>
-#include <linux/uio.h>
-
-/*
- * Base types
- */
-
-#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
-
-#define xfs_buf_ctob(pp) ((pp) * PAGE_CACHE_SIZE)
-#define xfs_buf_btoc(dd) (((dd) + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_btoct(dd) ((dd) >> PAGE_CACHE_SHIFT)
-#define xfs_buf_poff(aa) ((aa) & ~PAGE_CACHE_MASK)
-
-typedef enum {
- XBRW_READ = 1, /* transfer into target memory */
- XBRW_WRITE = 2, /* transfer from target memory */
- XBRW_ZERO = 3, /* Zero target memory */
-} xfs_buf_rw_t;
-
-#define XBF_READ (1 << 0) /* buffer intended for reading from device */
-#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
-#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
-#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
-#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
-#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
-#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_ORDERED (1 << 11)/* use ordered writes */
-#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
-#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
-
-/* flags used only as arguments to access routines */
-#define XBF_LOCK (1 << 14)/* lock requested */
-#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
-#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
-
-/* flags used only internally */
-#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
-#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
-#define _XBF_KMEM (1 << 20)/* backed by heap memory */
-#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
-
-typedef unsigned int xfs_buf_flags_t;
-
-#define XFS_BUF_FLAGS \
- { XBF_READ, "READ" }, \
- { XBF_WRITE, "WRITE" }, \
- { XBF_MAPPED, "MAPPED" }, \
- { XBF_ASYNC, "ASYNC" }, \
- { XBF_DONE, "DONE" }, \
- { XBF_DELWRI, "DELWRI" }, \
- { XBF_STALE, "STALE" }, \
- { XBF_ORDERED, "ORDERED" }, \
- { XBF_READ_AHEAD, "READ_AHEAD" }, \
- { XBF_LOCK, "LOCK" }, /* should never be set */\
- { XBF_TRYLOCK, "TRYLOCK" }, /* ditto */\
- { XBF_DONT_BLOCK, "DONT_BLOCK" }, /* ditto */\
- { _XBF_PAGES, "PAGES" }, \
- { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
- { _XBF_KMEM, "KMEM" }, \
- { _XBF_DELWRI_Q, "DELWRI_Q" }
-
-typedef enum {
- XBT_FORCE_SLEEP = 0,
- XBT_FORCE_FLUSH = 1,
-} xfs_buftarg_flags_t;
-
-typedef struct xfs_bufhash {
- struct list_head bh_list;
- spinlock_t bh_lock;
-} xfs_bufhash_t;
-
-typedef struct xfs_buftarg {
- dev_t bt_dev;
- struct block_device *bt_bdev;
- struct backing_dev_info *bt_bdi;
- struct xfs_mount *bt_mount;
- unsigned int bt_bsize;
- unsigned int bt_sshift;
- size_t bt_smask;
-
- /* per device delwri queue */
- struct task_struct *bt_task;
- struct list_head bt_delwrite_queue;
- spinlock_t bt_delwrite_lock;
- unsigned long bt_flags;
-
- /* LRU control structures */
- struct shrinker bt_shrinker;
- struct list_head bt_lru;
- spinlock_t bt_lru_lock;
- unsigned int bt_lru_nr;
-} xfs_buftarg_t;
-
-struct xfs_buf;
-typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
-
-#define XB_PAGES 2
-
-typedef struct xfs_buf {
- /*
- * first cacheline holds all the fields needed for an uncontended cache
- * hit to be fully processed. The semaphore straddles the cacheline
- * boundary, but the counter and lock sits on the first cacheline,
- * which is the only bit that is touched if we hit the semaphore
- * fast-path on locking.
- */
- struct rb_node b_rbnode; /* rbtree node */
- xfs_off_t b_file_offset; /* offset in file */
- size_t b_buffer_length;/* size of buffer in bytes */
- atomic_t b_hold; /* reference count */
- atomic_t b_lru_ref; /* lru reclaim ref count */
- xfs_buf_flags_t b_flags; /* status flags */
- struct semaphore b_sema; /* semaphore for lockables */
-
- struct list_head b_lru; /* lru list */
- wait_queue_head_t b_waiters; /* unpin waiters */
- struct list_head b_list;
- struct xfs_perag *b_pag; /* contains rbtree root */
- xfs_buftarg_t *b_target; /* buffer target (device) */
- xfs_daddr_t b_bn; /* block number for I/O */
- size_t b_count_desired;/* desired transfer size */
- void *b_addr; /* virtual address of buffer */
- struct work_struct b_iodone_work;
- xfs_buf_iodone_t b_iodone; /* I/O completion function */
- struct completion b_iowait; /* queue for I/O waiters */
- void *b_fspriv;
- void *b_fspriv2;
- struct page **b_pages; /* array of page pointers */
- struct page *b_page_array[XB_PAGES]; /* inline pages */
- unsigned long b_queuetime; /* time buffer was queued */
- atomic_t b_pin_count; /* pin count */
- atomic_t b_io_remaining; /* #outstanding I/O requests */
- unsigned int b_page_count; /* size of page array */
- unsigned int b_offset; /* page offset in first page */
- unsigned short b_error; /* error code on I/O */
-#ifdef XFS_BUF_LOCK_TRACKING
- int b_last_holder;
-#endif
-} xfs_buf_t;
-
-
-/* Finding and Reading Buffers */
-extern xfs_buf_t *_xfs_buf_find(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t, xfs_buf_t *);
-#define xfs_incore(buftarg,blkno,len,lockit) \
- _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-extern xfs_buf_t *xfs_buf_get(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
-extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
-
-extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len);
-extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
-extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
-extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
-struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
- struct xfs_buftarg *target,
- xfs_daddr_t daddr, size_t length, int flags);
-
-/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
-extern void xfs_buf_rele(xfs_buf_t *);
-
-/* Locking and Unlocking Buffers */
-extern int xfs_buf_cond_lock(xfs_buf_t *);
-extern int xfs_buf_lock_value(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
-
-/* Buffer Read and Write Routines */
-extern int xfs_bwrite(struct xfs_mount *mp, struct xfs_buf *bp);
-extern void xfs_bdwrite(void *mp, xfs_buf_t *bp);
-
-extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *);
-extern int xfs_bdstrat_cb(struct xfs_buf *);
-
-extern void xfs_buf_ioend(xfs_buf_t *, int);
-extern void xfs_buf_ioerror(xfs_buf_t *, int);
-extern int xfs_buf_iorequest(xfs_buf_t *);
-extern int xfs_buf_iowait(xfs_buf_t *);
-extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
- xfs_buf_rw_t);
-#define xfs_buf_zero(bp, off, len) \
- xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-static inline int xfs_buf_geterror(xfs_buf_t *bp)
-{
- return bp ? bp->b_error : ENOMEM;
-}
-
-/* Buffer Utility Routines */
-extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-
-/* Delayed Write Buffer Routines */
-extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
-extern void xfs_buf_delwri_promote(xfs_buf_t *);
-
-/* Buffer Daemon Setup Routines */
-extern int xfs_buf_init(void);
-extern void xfs_buf_terminate(void);
-
-#define xfs_buf_target_name(target) \
- ({ char __b[BDEVNAME_SIZE]; bdevname((target)->bt_bdev, __b); __b; })
-
-
-#define XFS_BUF_BFLAGS(bp) ((bp)->b_flags)
-#define XFS_BUF_ZEROFLAGS(bp) ((bp)->b_flags &= \
- ~(XBF_READ|XBF_WRITE|XBF_ASYNC|XBF_DELWRI|XBF_ORDERED))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_STALE(bp) xfs_buf_stale(bp);
-#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
-#define XFS_BUF_SUPER_STALE(bp) do { \
- XFS_BUF_STALE(bp); \
- xfs_buf_delwri_dequeue(bp); \
- XFS_BUF_DONE(bp); \
- } while (0)
-
-#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
-#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
-#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
-
-#define XFS_BUF_ERROR(bp,no) xfs_buf_ioerror(bp,no)
-#define XFS_BUF_GETERROR(bp) xfs_buf_geterror(bp)
-#define XFS_BUF_ISERROR(bp) (xfs_buf_geterror(bp) ? 1 : 0)
-
-#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_BUSY(bp) do { } while (0)
-#define XFS_BUF_UNBUSY(bp) do { } while (0)
-#define XFS_BUF_ISBUSY(bp) (1)
-
-#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_ORDERED(bp) ((bp)->b_flags |= XBF_ORDERED)
-#define XFS_BUF_UNORDERED(bp) ((bp)->b_flags &= ~XBF_ORDERED)
-#define XFS_BUF_ISORDERED(bp) ((bp)->b_flags & XBF_ORDERED)
-
-#define XFS_BUF_HOLD(bp) xfs_buf_hold(bp)
-#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-
-#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
-#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
-#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
-
-#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
-#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
-#define XFS_BUF_FSPRIVATE2(bp, type) ((type)(bp)->b_fspriv2)
-#define XFS_BUF_SET_FSPRIVATE2(bp, val) ((bp)->b_fspriv2 = (void*)(val))
-#define XFS_BUF_SET_START(bp) do { } while (0)
-
-#define XFS_BUF_PTR(bp) (xfs_caddr_t)((bp)->b_addr)
-#define XFS_BUF_SET_PTR(bp, val, cnt) xfs_buf_associate_memory(bp, val, cnt)
-#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
-#define XFS_BUF_OFFSET(bp) ((bp)->b_file_offset)
-#define XFS_BUF_SET_OFFSET(bp, off) ((bp)->b_file_offset = (off))
-#define XFS_BUF_COUNT(bp) ((bp)->b_count_desired)
-#define XFS_BUF_SET_COUNT(bp, cnt) ((bp)->b_count_desired = (cnt))
-#define XFS_BUF_SIZE(bp) ((bp)->b_buffer_length)
-#define XFS_BUF_SET_SIZE(bp, cnt) ((bp)->b_buffer_length = (cnt))
-
-static inline void
-xfs_buf_set_ref(
- struct xfs_buf *bp,
- int lru_ref)
-{
- atomic_set(&bp->b_lru_ref, lru_ref);
-}
-#define XFS_BUF_SET_VTYPE_REF(bp, type, ref) xfs_buf_set_ref(bp, ref)
-#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
-
-#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
-
-#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
-#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
-#define XFS_BUF_VSEMA(bp) xfs_buf_unlock(bp)
-#define XFS_BUF_PSEMA(bp,x) xfs_buf_lock(bp)
-#define XFS_BUF_FINISH_IOWAIT(bp) complete(&bp->b_iowait);
-
-#define XFS_BUF_SET_TARGET(bp, target) ((bp)->b_target = (target))
-#define XFS_BUF_TARGET(bp) ((bp)->b_target)
-#define XFS_BUFTARG_NAME(target) xfs_buf_target_name(target)
-
-static inline void xfs_buf_relse(xfs_buf_t *bp)
-{
- xfs_buf_unlock(bp);
- xfs_buf_rele(bp);
-}
-
-/*
- * Handling of buftargs.
- */
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
- struct block_device *, int, const char *);
-extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
-extern void xfs_wait_buftarg(xfs_buftarg_t *);
-extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
-extern int xfs_flush_buftarg(xfs_buftarg_t *, int);
-
-#ifdef CONFIG_KDB_MODULES
-extern struct list_head *xfs_get_buftarg_list(void);
-#endif
-
-#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
-#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
-
-#define XFS_bflush(buftarg) xfs_flush_buftarg(buftarg, 1)
-
-#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_discard.c b/fs/xfs/linux-2.6/xfs_discard.c
deleted file mode 100644
index 572494f..0000000
--- a/fs/xfs/linux-2.6/xfs_discard.c
+++ /dev/null
@@ -1,222 +0,0 @@
-/*
- * Copyright (C) 2010 Red Hat, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_inode.h"
-#include "xfs_alloc.h"
-#include "xfs_error.h"
-#include "xfs_discard.h"
-#include "xfs_trace.h"
-
-STATIC int
-xfs_trim_extents(
- struct xfs_mount *mp,
- xfs_agnumber_t agno,
- xfs_fsblock_t start,
- xfs_fsblock_t len,
- xfs_fsblock_t minlen,
- __uint64_t *blocks_trimmed)
-{
- struct block_device *bdev = mp->m_ddev_targp->bt_bdev;
- struct xfs_btree_cur *cur;
- struct xfs_buf *agbp;
- struct xfs_perag *pag;
- int error;
- int i;
-
- pag = xfs_perag_get(mp, agno);
-
- error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
- if (error || !agbp)
- goto out_put_perag;
-
- cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
-
- /*
- * Force out the log. This means any transactions that might have freed
- * space before we took the AGF buffer lock are now on disk, and the
- * volatile disk cache is flushed.
- */
- xfs_log_force(mp, XFS_LOG_SYNC);
-
- /*
- * Look up the longest btree in the AGF and start with it.
- */
- error = xfs_alloc_lookup_le(cur, 0,
- be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest), &i);
- if (error)
- goto out_del_cursor;
-
- /*
- * Loop until we are done with all extents that are large
- * enough to be worth discarding.
- */
- while (i) {
- xfs_agblock_t fbno;
- xfs_extlen_t flen;
-
- error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
- if (error)
- goto out_del_cursor;
- XFS_WANT_CORRUPTED_GOTO(i == 1, out_del_cursor);
- ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
-
- /*
- * Too small? Give up.
- */
- if (flen < minlen) {
- trace_xfs_discard_toosmall(mp, agno, fbno, flen);
- goto out_del_cursor;
- }
-
- /*
- * If the extent is entirely outside of the range we are
- * supposed to discard skip it. Do not bother to trim
- * down partially overlapping ranges for now.
- */
- if (XFS_AGB_TO_FSB(mp, agno, fbno) + flen < start ||
- XFS_AGB_TO_FSB(mp, agno, fbno) >= start + len) {
- trace_xfs_discard_exclude(mp, agno, fbno, flen);
- goto next_extent;
- }
-
- /*
- * If any blocks in the range are still busy, skip the
- * discard and try again the next time.
- */
- if (xfs_alloc_busy_search(mp, agno, fbno, flen)) {
- trace_xfs_discard_busy(mp, agno, fbno, flen);
- goto next_extent;
- }
-
- trace_xfs_discard_extent(mp, agno, fbno, flen);
- error = -blkdev_issue_discard(bdev,
- XFS_AGB_TO_DADDR(mp, agno, fbno),
- XFS_FSB_TO_BB(mp, flen),
- GFP_NOFS, 0);
- if (error)
- goto out_del_cursor;
- *blocks_trimmed += flen;
-
-next_extent:
- error = xfs_btree_decrement(cur, 0, &i);
- if (error)
- goto out_del_cursor;
- }
-
-out_del_cursor:
- xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
- xfs_buf_relse(agbp);
-out_put_perag:
- xfs_perag_put(pag);
- return error;
-}
-
-int
-xfs_ioc_trim(
- struct xfs_mount *mp,
- struct fstrim_range __user *urange)
-{
- struct request_queue *q = mp->m_ddev_targp->bt_bdev->bd_disk->queue;
- unsigned int granularity = q->limits.discard_granularity;
- struct fstrim_range range;
- xfs_fsblock_t start, len, minlen;
- xfs_agnumber_t start_agno, end_agno, agno;
- __uint64_t blocks_trimmed = 0;
- int error, last_error = 0;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (!blk_queue_discard(q))
- return -XFS_ERROR(EOPNOTSUPP);
- if (copy_from_user(&range, urange, sizeof(range)))
- return -XFS_ERROR(EFAULT);
-
- /*
- * Truncating down the len isn't actually quite correct, but using
- * XFS_B_TO_FSB would mean we trivially get overflows for values
- * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
- * used by the fstrim application. In the end it really doesn't
- * matter as trimming blocks is an advisory interface.
- */
- start = XFS_B_TO_FSBT(mp, range.start);
- len = XFS_B_TO_FSBT(mp, range.len);
- minlen = XFS_B_TO_FSB(mp, max_t(u64, granularity, range.minlen));
-
- start_agno = XFS_FSB_TO_AGNO(mp, start);
- if (start_agno >= mp->m_sb.sb_agcount)
- return -XFS_ERROR(EINVAL);
-
- end_agno = XFS_FSB_TO_AGNO(mp, start + len);
- if (end_agno >= mp->m_sb.sb_agcount)
- end_agno = mp->m_sb.sb_agcount - 1;
-
- for (agno = start_agno; agno <= end_agno; agno++) {
- error = -xfs_trim_extents(mp, agno, start, len, minlen,
- &blocks_trimmed);
- if (error)
- last_error = error;
- }
-
- if (last_error)
- return last_error;
-
- range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
- if (copy_to_user(urange, &range, sizeof(range)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-int
-xfs_discard_extents(
- struct xfs_mount *mp,
- struct list_head *list)
-{
- struct xfs_busy_extent *busyp;
- int error = 0;
-
- list_for_each_entry(busyp, list, list) {
- trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
- busyp->length);
-
- error = -blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
- XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
- XFS_FSB_TO_BB(mp, busyp->length),
- GFP_NOFS, 0);
- if (error && error != EOPNOTSUPP) {
- xfs_info(mp,
- "discard failed for extent [0x%llu,%u], error %d",
- (unsigned long long)busyp->bno,
- busyp->length,
- error);
- return error;
- }
- }
-
- return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_discard.h b/fs/xfs/linux-2.6/xfs_discard.h
deleted file mode 100644
index 344879a..0000000
--- a/fs/xfs/linux-2.6/xfs_discard.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef XFS_DISCARD_H
-#define XFS_DISCARD_H 1
-
-struct fstrim_range;
-struct list_head;
-
-extern int xfs_ioc_trim(struct xfs_mount *, struct fstrim_range __user *);
-extern int xfs_discard_extents(struct xfs_mount *, struct list_head *);
-
-#endif /* XFS_DISCARD_H */
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
deleted file mode 100644
index 844b22b..0000000
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_mount.h"
-#include "xfs_export.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_trace.h"
-
-/*
- * Note that we only accept fileids which are long enough rather than allow
- * the parent generation number to default to zero. XFS considers zero a
- * valid generation number not an invalid/wildcard value.
- */
-static int xfs_fileid_length(int fileid_type)
-{
- switch (fileid_type) {
- case FILEID_INO32_GEN:
- return 2;
- case FILEID_INO32_GEN_PARENT:
- return 4;
- case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
- return 3;
- case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
- return 6;
- }
- return 255; /* invalid */
-}
-
-STATIC int
-xfs_fs_encode_fh(
- struct dentry *dentry,
- __u32 *fh,
- int *max_len,
- int connectable)
-{
- struct fid *fid = (struct fid *)fh;
- struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fh;
- struct inode *inode = dentry->d_inode;
- int fileid_type;
- int len;
-
- /* Directories don't need their parent encoded, they have ".." */
- if (S_ISDIR(inode->i_mode) || !connectable)
- fileid_type = FILEID_INO32_GEN;
- else
- fileid_type = FILEID_INO32_GEN_PARENT;
-
- /*
- * If the the filesystem may contain 64bit inode numbers, we need
- * to use larger file handles that can represent them.
- *
- * While we only allocate inodes that do not fit into 32 bits any
- * large enough filesystem may contain them, thus the slightly
- * confusing looking conditional below.
- */
- if (!(XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_SMALL_INUMS) ||
- (XFS_M(inode->i_sb)->m_flags & XFS_MOUNT_32BITINODES))
- fileid_type |= XFS_FILEID_TYPE_64FLAG;
-
- /*
- * Only encode if there is enough space given. In practice
- * this means we can't export a filesystem with 64bit inodes
- * over NFSv2 with the subtree_check export option; the other
- * seven combinations work. The real answer is "don't use v2".
- */
- len = xfs_fileid_length(fileid_type);
- if (*max_len < len) {
- *max_len = len;
- return 255;
- }
- *max_len = len;
-
- switch (fileid_type) {
- case FILEID_INO32_GEN_PARENT:
- spin_lock(&dentry->d_lock);
- fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
- fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
- spin_unlock(&dentry->d_lock);
- /*FALLTHRU*/
- case FILEID_INO32_GEN:
- fid->i32.ino = XFS_I(inode)->i_ino;
- fid->i32.gen = inode->i_generation;
- break;
- case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
- spin_lock(&dentry->d_lock);
- fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
- fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
- spin_unlock(&dentry->d_lock);
- /*FALLTHRU*/
- case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
- fid64->ino = XFS_I(inode)->i_ino;
- fid64->gen = inode->i_generation;
- break;
- }
-
- return fileid_type;
-}
-
-STATIC struct inode *
-xfs_nfs_get_inode(
- struct super_block *sb,
- u64 ino,
- u32 generation)
- {
- xfs_mount_t *mp = XFS_M(sb);
- xfs_inode_t *ip;
- int error;
-
- /*
- * NFS can sometimes send requests for ino 0. Fail them gracefully.
- */
- if (ino == 0)
- return ERR_PTR(-ESTALE);
-
- /*
- * The XFS_IGET_UNTRUSTED means that an invalid inode number is just
- * fine and not an indication of a corrupted filesystem as clients can
- * send invalid file handles and we have to handle it gracefully..
- */
- error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
- if (error) {
- /*
- * EINVAL means the inode cluster doesn't exist anymore.
- * This implies the filehandle is stale, so we should
- * translate it here.
- * We don't use ESTALE directly down the chain to not
- * confuse applications using bulkstat that expect EINVAL.
- */
- if (error == EINVAL)
- error = ESTALE;
- return ERR_PTR(-error);
- }
-
- if (ip->i_d.di_gen != generation) {
- IRELE(ip);
- return ERR_PTR(-ENOENT);
- }
-
- return VFS_I(ip);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fileid_type)
-{
- struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
- struct inode *inode = NULL;
-
- if (fh_len < xfs_fileid_length(fileid_type))
- return NULL;
-
- switch (fileid_type) {
- case FILEID_INO32_GEN_PARENT:
- case FILEID_INO32_GEN:
- inode = xfs_nfs_get_inode(sb, fid->i32.ino, fid->i32.gen);
- break;
- case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
- case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
- inode = xfs_nfs_get_inode(sb, fid64->ino, fid64->gen);
- break;
- }
-
- return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fileid_type)
-{
- struct xfs_fid64 *fid64 = (struct xfs_fid64 *)fid;
- struct inode *inode = NULL;
-
- if (fh_len < xfs_fileid_length(fileid_type))
- return NULL;
-
- switch (fileid_type) {
- case FILEID_INO32_GEN_PARENT:
- inode = xfs_nfs_get_inode(sb, fid->i32.parent_ino,
- fid->i32.parent_gen);
- break;
- case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
- inode = xfs_nfs_get_inode(sb, fid64->parent_ino,
- fid64->parent_gen);
- break;
- }
-
- return d_obtain_alias(inode);
-}
-
-STATIC struct dentry *
-xfs_fs_get_parent(
- struct dentry *child)
-{
- int error;
- struct xfs_inode *cip;
-
- error = xfs_lookup(XFS_I(child->d_inode), &xfs_name_dotdot, &cip, NULL);
- if (unlikely(error))
- return ERR_PTR(-error);
-
- return d_obtain_alias(VFS_I(cip));
-}
-
-STATIC int
-xfs_fs_nfs_commit_metadata(
- struct inode *inode)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- int error = 0;
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- if (xfs_ipincount(ip)) {
- error = _xfs_log_force_lsn(mp, ip->i_itemp->ili_last_lsn,
- XFS_LOG_SYNC, NULL);
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- return error;
-}
-
-const struct export_operations xfs_export_operations = {
- .encode_fh = xfs_fs_encode_fh,
- .fh_to_dentry = xfs_fs_fh_to_dentry,
- .fh_to_parent = xfs_fs_fh_to_parent,
- .get_parent = xfs_fs_get_parent,
- .commit_metadata = xfs_fs_nfs_commit_metadata,
-};
diff --git a/fs/xfs/linux-2.6/xfs_export.h b/fs/xfs/linux-2.6/xfs_export.h
deleted file mode 100644
index 3272b6a..0000000
--- a/fs/xfs/linux-2.6/xfs_export.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_EXPORT_H__
-#define __XFS_EXPORT_H__
-
-/*
- * Common defines for code related to exporting XFS filesystems over NFS.
- *
- * The NFS fileid goes out on the wire as an array of
- * 32bit unsigned ints in host order. There are 5 possible
- * formats.
- *
- * (1) fileid_type=0x00
- * (no fileid data; handled by the generic code)
- *
- * (2) fileid_type=0x01
- * inode-num
- * generation
- *
- * (3) fileid_type=0x02
- * inode-num
- * generation
- * parent-inode-num
- * parent-generation
- *
- * (4) fileid_type=0x81
- * inode-num-lo32
- * inode-num-hi32
- * generation
- *
- * (5) fileid_type=0x82
- * inode-num-lo32
- * inode-num-hi32
- * generation
- * parent-inode-num-lo32
- * parent-inode-num-hi32
- * parent-generation
- *
- * Note, the NFS filehandle also includes an fsid portion which
- * may have an inode number in it. That number is hardcoded to
- * 32bits and there is no way for XFS to intercept it. In
- * practice this means when exporting an XFS filesystem with 64bit
- * inodes you should either export the mountpoint (rather than
- * a subdirectory) or use the "fsid" export option.
- */
-
-struct xfs_fid64 {
- u64 ino;
- u32 gen;
- u64 parent_ino;
- u32 parent_gen;
-} __attribute__((packed));
-
-/* This flag goes on the wire. Don't play with it. */
-#define XFS_FILEID_TYPE_64FLAG 0x80 /* NFS fileid has 64bit inodes */
-
-#endif /* __XFS_EXPORT_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
deleted file mode 100644
index b679198..0000000
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ /dev/null
@@ -1,1114 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_trans.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_inode_item.h"
-#include "xfs_bmap.h"
-#include "xfs_error.h"
-#include "xfs_vnodeops.h"
-#include "xfs_da_btree.h"
-#include "xfs_ioctl.h"
-#include "xfs_trace.h"
-
-#include <linux/dcache.h>
-#include <linux/falloc.h>
-
-static const struct vm_operations_struct xfs_file_vm_ops;
-
-/*
- * Locking primitives for read and write IO paths to ensure we consistently use
- * and order the inode->i_mutex, ip->i_lock and ip->i_iolock.
- */
-static inline void
-xfs_rw_ilock(
- struct xfs_inode *ip,
- int type)
-{
- if (type & XFS_IOLOCK_EXCL)
- mutex_lock(&VFS_I(ip)->i_mutex);
- xfs_ilock(ip, type);
-}
-
-static inline void
-xfs_rw_iunlock(
- struct xfs_inode *ip,
- int type)
-{
- xfs_iunlock(ip, type);
- if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-static inline void
-xfs_rw_ilock_demote(
- struct xfs_inode *ip,
- int type)
-{
- xfs_ilock_demote(ip, type);
- if (type & XFS_IOLOCK_EXCL)
- mutex_unlock(&VFS_I(ip)->i_mutex);
-}
-
-/*
- * xfs_iozero
- *
- * xfs_iozero clears the specified range of buffer supplied,
- * and marks all the affected blocks as valid and modified. If
- * an affected block is not allocated, it will be allocated. If
- * an affected block is not completely overwritten, and is not
- * valid before the operation, it will be read from disk before
- * being partially zeroed.
- */
-STATIC int
-xfs_iozero(
- struct xfs_inode *ip, /* inode */
- loff_t pos, /* offset in file */
- size_t count) /* size of data to zero */
-{
- struct page *page;
- struct address_space *mapping;
- int status;
-
- mapping = VFS_I(ip)->i_mapping;
- do {
- unsigned offset, bytes;
- void *fsdata;
-
- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
- bytes = PAGE_CACHE_SIZE - offset;
- if (bytes > count)
- bytes = count;
-
- status = pagecache_write_begin(NULL, mapping, pos, bytes,
- AOP_FLAG_UNINTERRUPTIBLE,
- &page, &fsdata);
- if (status)
- break;
-
- zero_user(page, offset, bytes);
-
- status = pagecache_write_end(NULL, mapping, pos, bytes, bytes,
- page, fsdata);
- WARN_ON(status <= 0); /* can't return less than zero! */
- pos += bytes;
- count -= bytes;
- status = 0;
- } while (count);
-
- return (-status);
-}
-
-STATIC int
-xfs_file_fsync(
- struct file *file,
- int datasync)
-{
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error = 0;
- int log_flushed = 0;
-
- trace_xfs_file_fsync(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
-
- xfs_ioend_wait(ip);
-
- if (mp->m_flags & XFS_MOUNT_BARRIER) {
- /*
- * If we have an RT and/or log subvolume we need to make sure
- * to flush the write cache the device used for file data
- * first. This is to ensure newly written file data make
- * it to disk before logging the new inode size in case of
- * an extending write.
- */
- if (XFS_IS_REALTIME_INODE(ip))
- xfs_blkdev_issue_flush(mp->m_rtdev_targp);
- else if (mp->m_logdev_targp != mp->m_ddev_targp)
- xfs_blkdev_issue_flush(mp->m_ddev_targp);
- }
-
- /*
- * We always need to make sure that the required inode state is safe on
- * disk. The inode might be clean but we still might need to force the
- * log because of committed transactions that haven't hit the disk yet.
- * Likewise, there could be unflushed non-transactional changes to the
- * inode core that have to go to disk and this requires us to issue
- * a synchronous transaction to capture these changes correctly.
- *
- * This code relies on the assumption that if the i_update_core field
- * of the inode is clear and the inode is unpinned then it is clean
- * and no action is required.
- */
- xfs_ilock(ip, XFS_ILOCK_SHARED);
-
- /*
- * First check if the VFS inode is marked dirty. All the dirtying
- * of non-transactional updates no goes through mark_inode_dirty*,
- * which allows us to distinguish beteeen pure timestamp updates
- * and i_size updates which need to be caught for fdatasync.
- * After that also theck for the dirty state in the XFS inode, which
- * might gets cleared when the inode gets written out via the AIL
- * or xfs_iflush_cluster.
- */
- if (((inode->i_state & I_DIRTY_DATASYNC) ||
- ((inode->i_state & I_DIRTY_SYNC) && !datasync)) &&
- ip->i_update_core) {
- /*
- * Kick off a transaction to log the inode core to get the
- * updates. The sync transaction will also force the log.
- */
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0,
- XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return -error;
- }
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- /*
- * Note - it's possible that we might have pushed ourselves out
- * of the way during trans_reserve which would flush the inode.
- * But there's no guarantee that the inode buffer has actually
- * gone out yet (it's delwri). Plus the buffer could be pinned
- * anyway if it's part of an inode in another recent
- * transaction. So we play it safe and fire off the
- * transaction anyway.
- */
- xfs_trans_ijoin(tp, ip);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- xfs_trans_set_sync(tp);
- error = _xfs_trans_commit(tp, 0, &log_flushed);
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- } else {
- /*
- * Timestamps/size haven't changed since last inode flush or
- * inode transaction commit. That means either nothing got
- * written or a transaction committed which caught the updates.
- * If the latter happened and the transaction hasn't hit the
- * disk yet, the inode will be still be pinned. If it is,
- * force the log.
- */
- if (xfs_ipincount(ip)) {
- error = _xfs_log_force_lsn(mp,
- ip->i_itemp->ili_last_lsn,
- XFS_LOG_SYNC, &log_flushed);
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- }
-
- /*
- * If we only have a single device, and the log force about was
- * a no-op we might have to flush the data device cache here.
- * This can only happen for fdatasync/O_DSYNC if we were overwriting
- * an already allocated file and thus do not have any metadata to
- * commit.
- */
- if ((mp->m_flags & XFS_MOUNT_BARRIER) &&
- mp->m_logdev_targp == mp->m_ddev_targp &&
- !XFS_IS_REALTIME_INODE(ip) &&
- !log_flushed)
- xfs_blkdev_issue_flush(mp->m_ddev_targp);
-
- return -error;
-}
-
-STATIC ssize_t
-xfs_file_aio_read(
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- size_t size = 0;
- ssize_t ret = 0;
- int ioflags = 0;
- xfs_fsize_t n;
- unsigned long seg;
-
- XFS_STATS_INC(xs_read_calls);
-
- BUG_ON(iocb->ki_pos != pos);
-
- if (unlikely(file->f_flags & O_DIRECT))
- ioflags |= IO_ISDIRECT;
- if (file->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- /* START copy & waste from filemap.c */
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *iv = &iovp[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- size += iv->iov_len;
- if (unlikely((ssize_t)(size|iv->iov_len) < 0))
- return XFS_ERROR(-EINVAL);
- }
- /* END copy & waste from filemap.c */
-
- if (unlikely(ioflags & IO_ISDIRECT)) {
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((iocb->ki_pos & target->bt_smask) ||
- (size & target->bt_smask)) {
- if (iocb->ki_pos == ip->i_size)
- return 0;
- return -XFS_ERROR(EINVAL);
- }
- }
-
- n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
- if (n <= 0 || size == 0)
- return 0;
-
- if (n < size)
- size = n;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- /*
- * Locking is a bit tricky here. If we take an exclusive lock
- * for direct IO, we effectively serialise all new concurrent
- * read IO to this file and block it behind IO that is currently in
- * progress because IO in progress holds the IO lock shared. We only
- * need to hold the lock exclusive to blow away the page cache, so
- * only take lock exclusively if the page cache needs invalidation.
- * This allows the normal direct IO case of no page cache pages to
- * proceeed concurrently without serialisation.
- */
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
- if ((ioflags & IO_ISDIRECT) && inode->i_mapping->nrpages) {
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
- xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
-
- if (inode->i_mapping->nrpages) {
- ret = -xfs_flushinval_pages(ip,
- (iocb->ki_pos & PAGE_CACHE_MASK),
- -1, FI_REMAPF_LOCKED);
- if (ret) {
- xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
- }
- }
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- }
-
- trace_xfs_file_read(ip, size, iocb->ki_pos, ioflags);
-
- ret = generic_file_aio_read(iocb, iovp, nr_segs, iocb->ki_pos);
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
-}
-
-STATIC ssize_t
-xfs_file_splice_read(
- struct file *infilp,
- loff_t *ppos,
- struct pipe_inode_info *pipe,
- size_t count,
- unsigned int flags)
-{
- struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
- int ioflags = 0;
- ssize_t ret;
-
- XFS_STATS_INC(xs_read_calls);
-
- if (infilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_rw_ilock(ip, XFS_IOLOCK_SHARED);
-
- trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_read_bytes, ret);
-
- xfs_rw_iunlock(ip, XFS_IOLOCK_SHARED);
- return ret;
-}
-
-STATIC void
-xfs_aio_write_isize_update(
- struct inode *inode,
- loff_t *ppos,
- ssize_t bytes_written)
-{
- struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t isize = i_size_read(inode);
-
- if (bytes_written > 0)
- XFS_STATS_ADD(xs_write_bytes, bytes_written);
-
- if (unlikely(bytes_written < 0 && bytes_written != -EFAULT &&
- *ppos > isize))
- *ppos = isize;
-
- if (*ppos > ip->i_size) {
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
- if (*ppos > ip->i_size)
- ip->i_size = *ppos;
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
- }
-}
-
-/*
- * If this was a direct or synchronous I/O that failed (such as ENOSPC) then
- * part of the I/O may have been written to disk before the error occurred. In
- * this case the on-disk file size may have been adjusted beyond the in-memory
- * file size and now needs to be truncated back.
- */
-STATIC void
-xfs_aio_write_newsize_update(
- struct xfs_inode *ip)
-{
- if (ip->i_new_size) {
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
- ip->i_new_size = 0;
- if (ip->i_d.di_size > ip->i_size)
- ip->i_d.di_size = ip->i_size;
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
- }
-}
-
-/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
- struct pipe_inode_info *pipe,
- struct file *outfilp,
- loff_t *ppos,
- size_t count,
- unsigned int flags)
-{
- struct inode *inode = outfilp->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t new_size;
- int ioflags = 0;
- ssize_t ret;
-
- XFS_STATS_INC(xs_write_calls);
-
- if (outfilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- new_size = *ppos + count;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (new_size > ip->i_size)
- ip->i_new_size = new_size;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
-
- xfs_aio_write_isize_update(inode, ppos, ret);
- xfs_aio_write_newsize_update(ip);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
-}
-
-/*
- * This routine is called to handle zeroing any space in the last
- * block of the file that is beyond the EOF. We do this since the
- * size is being increased without writing anything to that block
- * and we don't want anyone to read the garbage on the disk.
- */
-STATIC int /* error (positive) */
-xfs_zero_last_block(
- xfs_inode_t *ip,
- xfs_fsize_t offset,
- xfs_fsize_t isize)
-{
- xfs_fileoff_t last_fsb;
- xfs_mount_t *mp = ip->i_mount;
- int nimaps;
- int zero_offset;
- int zero_len;
- int error = 0;
- xfs_bmbt_irec_t imap;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
- zero_offset = XFS_B_FSB_OFFSET(mp, isize);
- if (zero_offset == 0) {
- /*
- * There are no extra bytes in the last block on disk to
- * zero, so return.
- */
- return 0;
- }
-
- last_fsb = XFS_B_TO_FSBT(mp, isize);
- nimaps = 1;
- error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
- &nimaps, NULL);
- if (error) {
- return error;
- }
- ASSERT(nimaps > 0);
- /*
- * If the block underlying isize is just a hole, then there
- * is nothing to zero.
- */
- if (imap.br_startblock == HOLESTARTBLOCK) {
- return 0;
- }
- /*
- * Zero the part of the last block beyond the EOF, and write it
- * out sync. We need to drop the ilock while we do this so we
- * don't deadlock when the buffer cache calls back to us.
- */
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- zero_len = mp->m_sb.sb_blocksize - zero_offset;
- if (isize + zero_len > offset)
- zero_len = offset - isize;
- error = xfs_iozero(ip, isize, zero_len);
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ASSERT(error >= 0);
- return error;
-}
-
-/*
- * Zero any on disk space between the current EOF and the new,
- * larger EOF. This handles the normal case of zeroing the remainder
- * of the last block in the file and the unusual case of zeroing blocks
- * out beyond the size of the file. This second case only happens
- * with fixed size extents and when the system crashes before the inode
- * size was updated but after blocks were allocated. If fill is set,
- * then any holes in the range are filled and zeroed. If not, the holes
- * are left alone as holes.
- */
-
-int /* error (positive) */
-xfs_zero_eof(
- xfs_inode_t *ip,
- xfs_off_t offset, /* starting I/O offset */
- xfs_fsize_t isize) /* current inode size */
-{
- xfs_mount_t *mp = ip->i_mount;
- xfs_fileoff_t start_zero_fsb;
- xfs_fileoff_t end_zero_fsb;
- xfs_fileoff_t zero_count_fsb;
- xfs_fileoff_t last_fsb;
- xfs_fileoff_t zero_off;
- xfs_fsize_t zero_len;
- int nimaps;
- int error = 0;
- xfs_bmbt_irec_t imap;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT(offset > isize);
-
- /*
- * First handle zeroing the block on which isize resides.
- * We only zero a part of that block so it is handled specially.
- */
- error = xfs_zero_last_block(ip, offset, isize);
- if (error) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- return error;
- }
-
- /*
- * Calculate the range between the new size and the old
- * where blocks needing to be zeroed may exist. To get the
- * block where the last byte in the file currently resides,
- * we need to subtract one from the size and truncate back
- * to a block boundary. We subtract 1 in case the size is
- * exactly on a block boundary.
- */
- last_fsb = isize ? XFS_B_TO_FSBT(mp, isize - 1) : (xfs_fileoff_t)-1;
- start_zero_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)isize);
- end_zero_fsb = XFS_B_TO_FSBT(mp, offset - 1);
- ASSERT((xfs_sfiloff_t)last_fsb < (xfs_sfiloff_t)start_zero_fsb);
- if (last_fsb == end_zero_fsb) {
- /*
- * The size was only incremented on its last block.
- * We took care of that above, so just return.
- */
- return 0;
- }
-
- ASSERT(start_zero_fsb <= end_zero_fsb);
- while (start_zero_fsb <= end_zero_fsb) {
- nimaps = 1;
- zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
- error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
- 0, NULL, 0, &imap, &nimaps, NULL);
- if (error) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- return error;
- }
- ASSERT(nimaps > 0);
-
- if (imap.br_state == XFS_EXT_UNWRITTEN ||
- imap.br_startblock == HOLESTARTBLOCK) {
- /*
- * This loop handles initializing pages that were
- * partially initialized by the code below this
- * loop. It basically zeroes the part of the page
- * that sits on a hole and sets the page as P_HOLE
- * and calls remapf if it is a mapped file.
- */
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
- continue;
- }
-
- /*
- * There are blocks we need to zero.
- * Drop the inode lock while we're doing the I/O.
- * We'll still have the iolock to protect us.
- */
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- zero_off = XFS_FSB_TO_B(mp, start_zero_fsb);
- zero_len = XFS_FSB_TO_B(mp, imap.br_blockcount);
-
- if ((zero_off + zero_len) > offset)
- zero_len = offset - zero_off;
-
- error = xfs_iozero(ip, zero_off, zero_len);
- if (error) {
- goto out_lock;
- }
-
- start_zero_fsb = imap.br_startoff + imap.br_blockcount;
- ASSERT(start_zero_fsb <= (end_zero_fsb + 1));
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- }
-
- return 0;
-
-out_lock:
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- ASSERT(error >= 0);
- return error;
-}
-
-/*
- * Common pre-write limit and setup checks.
- *
- * Returns with iolock held according to @iolock.
- */
-STATIC ssize_t
-xfs_file_aio_write_checks(
- struct file *file,
- loff_t *pos,
- size_t *count,
- int *iolock)
-{
- struct inode *inode = file->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- xfs_fsize_t new_size;
- int error = 0;
-
- xfs_rw_ilock(ip, XFS_ILOCK_EXCL);
- error = generic_write_checks(file, pos, count, S_ISBLK(inode->i_mode));
- if (error) {
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL | *iolock);
- *iolock = 0;
- return error;
- }
-
- new_size = *pos + *count;
- if (new_size > ip->i_size)
- ip->i_new_size = new_size;
-
- if (likely(!(file->f_mode & FMODE_NOCMTIME)))
- file_update_time(file);
-
- /*
- * If the offset is beyond the size of the file, we need to zero any
- * blocks that fall between the existing EOF and the start of this
- * write.
- */
- if (*pos > ip->i_size)
- error = -xfs_zero_eof(ip, *pos, ip->i_size);
-
- xfs_rw_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- return error;
-
- /*
- * If we're writing the file then make sure to clear the setuid and
- * setgid bits if the process is not being run by root. This keeps
- * people from modifying setuid and setgid binaries.
- */
- return file_remove_suid(file);
-
-}
-
-/*
- * xfs_file_dio_aio_write - handle direct IO writes
- *
- * Lock the inode appropriately to prepare for and issue a direct IO write.
- * By separating it from the buffered write path we remove all the tricky to
- * follow locking changes and looping.
- *
- * If there are cached pages or we're extending the file, we need IOLOCK_EXCL
- * until we're sure the bytes at the new EOF have been zeroed and/or the cached
- * pages are flushed out.
- *
- * In most cases the direct IO writes will be done holding IOLOCK_SHARED
- * allowing them to be done in parallel with reads and other direct IO writes.
- * However, if the IO is not aligned to filesystem blocks, the direct IO layer
- * needs to do sub-block zeroing and that requires serialisation against other
- * direct IOs to the same block. In this case we need to serialise the
- * submission of the unaligned IOs so that we don't get racing block zeroing in
- * the dio layer. To avoid the problem with aio, we also need to wait for
- * outstanding IOs to complete so that unwritten extent conversion is completed
- * before we try to map the overlapping block. This is currently implemented by
- * hitting it with a big hammer (i.e. xfs_ioend_wait()).
- *
- * Returns with locks held indicated by @iolock and errors indicated by
- * negative return values.
- */
-STATIC ssize_t
-xfs_file_dio_aio_write(
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos,
- size_t ocount,
- int *iolock)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- ssize_t ret = 0;
- size_t count = ocount;
- int unaligned_io = 0;
- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- *iolock = 0;
- if ((pos & target->bt_smask) || (count & target->bt_smask))
- return -XFS_ERROR(EINVAL);
-
- if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
- unaligned_io = 1;
-
- if (unaligned_io || mapping->nrpages || pos > ip->i_size)
- *iolock = XFS_IOLOCK_EXCL;
- else
- *iolock = XFS_IOLOCK_SHARED;
- xfs_rw_ilock(ip, *iolock);
-
- ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
- if (ret)
- return ret;
-
- /*
- * Recheck if there are cached pages that need invalidate after we got
- * the iolock to protect against other threads adding new pages while
- * we were waiting for the iolock.
- */
- if (mapping->nrpages && *iolock == XFS_IOLOCK_SHARED) {
- xfs_rw_iunlock(ip, *iolock);
- *iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
- }
-
- if (mapping->nrpages) {
- ret = -xfs_flushinval_pages(ip, (pos & PAGE_CACHE_MASK), -1,
- FI_REMAPF_LOCKED);
- if (ret)
- return ret;
- }
-
- /*
- * If we are doing unaligned IO, wait for all other IO to drain,
- * otherwise demote the lock if we had to flush cached pages
- */
- if (unaligned_io)
- xfs_ioend_wait(ip);
- else if (*iolock == XFS_IOLOCK_EXCL) {
- xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
- *iolock = XFS_IOLOCK_SHARED;
- }
-
- trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_direct_write(iocb, iovp,
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
-
- /* No fallback to buffered IO on errors for XFS. */
- ASSERT(ret < 0 || ret == count);
- return ret;
-}
-
-STATIC ssize_t
-xfs_file_buffered_aio_write(
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos,
- size_t ocount,
- int *iolock)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- ssize_t ret;
- int enospc = 0;
- size_t count = ocount;
-
- *iolock = XFS_IOLOCK_EXCL;
- xfs_rw_ilock(ip, *iolock);
-
- ret = xfs_file_aio_write_checks(file, &pos, &count, iolock);
- if (ret)
- return ret;
-
- /* We can write back this queue in page reclaim */
- current->backing_dev_info = mapping->backing_dev_info;
-
-write_retry:
- trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_buffered_write(iocb, iovp, nr_segs,
- pos, &iocb->ki_pos, count, ret);
- /*
- * if we just got an ENOSPC, flush the inode now we aren't holding any
- * page locks and retry *once*
- */
- if (ret == -ENOSPC && !enospc) {
- ret = -xfs_flush_pages(ip, 0, -1, 0, FI_NONE);
- if (ret)
- return ret;
- enospc = 1;
- goto write_retry;
- }
- current->backing_dev_info = NULL;
- return ret;
-}
-
-STATIC ssize_t
-xfs_file_aio_write(
- struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
-{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- struct inode *inode = mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- ssize_t ret;
- int iolock;
- size_t ocount = 0;
-
- XFS_STATS_INC(xs_write_calls);
-
- BUG_ON(iocb->ki_pos != pos);
-
- ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
- if (ret)
- return ret;
-
- if (ocount == 0)
- return 0;
-
- xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- if (unlikely(file->f_flags & O_DIRECT))
- ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos,
- ocount, &iolock);
- else
- ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
- ocount, &iolock);
-
- xfs_aio_write_isize_update(inode, &iocb->ki_pos, ret);
-
- if (ret <= 0)
- goto out_unlock;
-
- /* Handle various SYNC-type writes */
- if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
- loff_t end = pos + ret - 1;
- int error, error2;
-
- xfs_rw_iunlock(ip, iolock);
- error = filemap_write_and_wait_range(mapping, pos, end);
- xfs_rw_ilock(ip, iolock);
-
- error2 = -xfs_file_fsync(file,
- (file->f_flags & __O_SYNC) ? 0 : 1);
- if (error)
- ret = error;
- else if (error2)
- ret = error2;
- }
-
-out_unlock:
- xfs_aio_write_newsize_update(ip);
- xfs_rw_iunlock(ip, iolock);
- return ret;
-}
-
-STATIC long
-xfs_file_fallocate(
- struct file *file,
- int mode,
- loff_t offset,
- loff_t len)
-{
- struct inode *inode = file->f_path.dentry->d_inode;
- long error;
- loff_t new_size = 0;
- xfs_flock64_t bf;
- xfs_inode_t *ip = XFS_I(inode);
- int cmd = XFS_IOC_RESVSP;
- int attr_flags = XFS_ATTR_NOLOCK;
-
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
- return -EOPNOTSUPP;
-
- bf.l_whence = 0;
- bf.l_start = offset;
- bf.l_len = len;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- if (mode & FALLOC_FL_PUNCH_HOLE)
- cmd = XFS_IOC_UNRESVSP;
-
- /* check the new inode size is valid before allocating */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
- new_size = offset + len;
- error = inode_newsize_ok(inode, new_size);
- if (error)
- goto out_unlock;
- }
-
- if (file->f_flags & O_DSYNC)
- attr_flags |= XFS_ATTR_SYNC;
-
- error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
- if (error)
- goto out_unlock;
-
- /* Change file size if needed */
- if (new_size) {
- struct iattr iattr;
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = new_size;
- error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
- }
-
-out_unlock:
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return error;
-}
-
-
-STATIC int
-xfs_file_open(
- struct inode *inode,
- struct file *file)
-{
- if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
- return -EFBIG;
- if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb)))
- return -EIO;
- return 0;
-}
-
-STATIC int
-xfs_dir_open(
- struct inode *inode,
- struct file *file)
-{
- struct xfs_inode *ip = XFS_I(inode);
- int mode;
- int error;
-
- error = xfs_file_open(inode, file);
- if (error)
- return error;
-
- /*
- * If there are any blocks, read-ahead block 0 as we're almost
- * certain to have the next operation be a read there.
- */
- mode = xfs_ilock_map_shared(ip);
- if (ip->i_d.di_nextents > 0)
- xfs_da_reada_buf(NULL, ip, 0, XFS_DATA_FORK);
- xfs_iunlock(ip, mode);
- return 0;
-}
-
-STATIC int
-xfs_file_release(
- struct inode *inode,
- struct file *filp)
-{
- return -xfs_release(XFS_I(inode));
-}
-
-STATIC int
-xfs_file_readdir(
- struct file *filp,
- void *dirent,
- filldir_t filldir)
-{
- struct inode *inode = filp->f_path.dentry->d_inode;
- xfs_inode_t *ip = XFS_I(inode);
- int error;
- size_t bufsize;
-
- /*
- * The Linux API doesn't pass down the total size of the buffer
- * we read into down to the filesystem. With the filldir concept
- * it's not needed for correct information, but the XFS dir2 leaf
- * code wants an estimate of the buffer size to calculate it's
- * readahead window and size the buffers used for mapping to
- * physical blocks.
- *
- * Try to give it an estimate that's good enough, maybe at some
- * point we can change the ->readdir prototype to include the
- * buffer size. For now we use the current glibc buffer size.
- */
- bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size);
-
- error = xfs_readdir(ip, dirent, bufsize,
- (xfs_off_t *)&filp->f_pos, filldir);
- if (error)
- return -error;
- return 0;
-}
-
-STATIC int
-xfs_file_mmap(
- struct file *filp,
- struct vm_area_struct *vma)
-{
- vma->vm_ops = &xfs_file_vm_ops;
- vma->vm_flags |= VM_CAN_NONLINEAR;
-
- file_accessed(filp);
- return 0;
-}
-
-/*
- * mmap()d file has taken write protection fault and is being made
- * writable. We can set the page state up correctly for a writable
- * page, which means we can do correct delalloc accounting (ENOSPC
- * checking!) and unwritten extent mapping.
- */
-STATIC int
-xfs_vm_page_mkwrite(
- struct vm_area_struct *vma,
- struct vm_fault *vmf)
-{
- return block_page_mkwrite(vma, vmf, xfs_get_blocks);
-}
-
-const struct file_operations xfs_file_operations = {
- .llseek = generic_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = xfs_file_aio_read,
- .aio_write = xfs_file_aio_write,
- .splice_read = xfs_file_splice_read,
- .splice_write = xfs_file_splice_write,
- .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = xfs_file_compat_ioctl,
-#endif
- .mmap = xfs_file_mmap,
- .open = xfs_file_open,
- .release = xfs_file_release,
- .fsync = xfs_file_fsync,
- .fallocate = xfs_file_fallocate,
-};
-
-const struct file_operations xfs_dir_file_operations = {
- .open = xfs_dir_open,
- .read = generic_read_dir,
- .readdir = xfs_file_readdir,
- .llseek = generic_file_llseek,
- .unlocked_ioctl = xfs_file_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = xfs_file_compat_ioctl,
-#endif
- .fsync = xfs_file_fsync,
-};
-
-static const struct vm_operations_struct xfs_file_vm_ops = {
- .fault = filemap_fault,
- .page_mkwrite = xfs_vm_page_mkwrite,
-};
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
deleted file mode 100644
index ed88ed1..0000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_vnodeops.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_trace.h"
-
-/*
- * note: all filemap functions return negative error codes. These
- * need to be inverted before returning to the xfs core functions.
- */
-void
-xfs_tosspages(
- xfs_inode_t *ip,
- xfs_off_t first,
- xfs_off_t last,
- int fiopt)
-{
- /* can't toss partial tail pages, so mask them out */
- last &= ~(PAGE_SIZE - 1);
- truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
-}
-
-int
-xfs_flushinval_pages(
- xfs_inode_t *ip,
- xfs_off_t first,
- xfs_off_t last,
- int fiopt)
-{
- struct address_space *mapping = VFS_I(ip)->i_mapping;
- int ret = 0;
-
- trace_xfs_pagecache_inval(ip, first, last);
-
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = filemap_write_and_wait_range(mapping, first,
- last == -1 ? LLONG_MAX : last);
- if (!ret)
- truncate_inode_pages_range(mapping, first, last);
- return -ret;
-}
-
-int
-xfs_flush_pages(
- xfs_inode_t *ip,
- xfs_off_t first,
- xfs_off_t last,
- uint64_t flags,
- int fiopt)
-{
- struct address_space *mapping = VFS_I(ip)->i_mapping;
- int ret = 0;
- int ret2;
-
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = -filemap_fdatawrite_range(mapping, first,
- last == -1 ? LLONG_MAX : last);
- if (flags & XBF_ASYNC)
- return ret;
- ret2 = xfs_wait_on_pages(ip, first, last);
- if (!ret)
- ret = ret2;
- return ret;
-}
-
-int
-xfs_wait_on_pages(
- xfs_inode_t *ip,
- xfs_off_t first,
- xfs_off_t last)
-{
- struct address_space *mapping = VFS_I(ip)->i_mapping;
-
- if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
- return -filemap_fdatawait_range(mapping, first,
- last == -1 ? ip->i_size - 1 : last);
- }
- return 0;
-}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
deleted file mode 100644
index 76e81cf..0000000
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_sysctl.h"
-
-/*
- * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n,
- * other XFS code uses these values. Times are measured in centisecs (i.e.
- * 100ths of a second).
- */
-xfs_param_t xfs_params = {
- /* MIN DFLT MAX */
- .sgid_inherit = { 0, 0, 1 },
- .symlink_mode = { 0, 0, 1 },
- .panic_mask = { 0, 0, 255 },
- .error_level = { 0, 3, 11 },
- .syncd_timer = { 1*100, 30*100, 7200*100},
- .stats_clear = { 0, 0, 1 },
- .inherit_sync = { 0, 1, 1 },
- .inherit_nodump = { 0, 1, 1 },
- .inherit_noatim = { 0, 1, 1 },
- .xfs_buf_timer = { 100/2, 1*100, 30*100 },
- .xfs_buf_age = { 1*100, 15*100, 7200*100},
- .inherit_nosym = { 0, 0, 1 },
- .rotorstep = { 1, 1, 255 },
- .inherit_nodfrg = { 0, 1, 1 },
- .fstrm_timer = { 1, 30*100, 3600*100},
-};
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
deleted file mode 100644
index acca2c5..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ /dev/null
@@ -1,1556 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ioctl.h"
-#include "xfs_rtalloc.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_bmap.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_dfrag.h"
-#include "xfs_fsops.h"
-#include "xfs_vnodeops.h"
-#include "xfs_discard.h"
-#include "xfs_quota.h"
-#include "xfs_inode_item.h"
-#include "xfs_export.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/dcache.h>
-#include <linux/mount.h>
-#include <linux/namei.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/exportfs.h>
-
-/*
- * xfs_find_handle maps from userspace xfs_fsop_handlereq structure to
- * a file or fs handle.
- *
- * XFS_IOC_PATH_TO_FSHANDLE
- * returns fs handle for a mount point or path within that mount point
- * XFS_IOC_FD_TO_HANDLE
- * returns full handle for a FD opened in user space
- * XFS_IOC_PATH_TO_HANDLE
- * returns full handle for a path
- */
-int
-xfs_find_handle(
- unsigned int cmd,
- xfs_fsop_handlereq_t *hreq)
-{
- int hsize;
- xfs_handle_t handle;
- struct inode *inode;
- struct file *file = NULL;
- struct path path;
- int error;
- struct xfs_inode *ip;
-
- if (cmd == XFS_IOC_FD_TO_HANDLE) {
- file = fget(hreq->fd);
- if (!file)
- return -EBADF;
- inode = file->f_path.dentry->d_inode;
- } else {
- error = user_lpath((const char __user *)hreq->path, &path);
- if (error)
- return error;
- inode = path.dentry->d_inode;
- }
- ip = XFS_I(inode);
-
- /*
- * We can only generate handles for inodes residing on a XFS filesystem,
- * and only for regular files, directories or symbolic links.
- */
- error = -EINVAL;
- if (inode->i_sb->s_magic != XFS_SB_MAGIC)
- goto out_put;
-
- error = -EBADF;
- if (!S_ISREG(inode->i_mode) &&
- !S_ISDIR(inode->i_mode) &&
- !S_ISLNK(inode->i_mode))
- goto out_put;
-
-
- memcpy(&handle.ha_fsid, ip->i_mount->m_fixedfsid, sizeof(xfs_fsid_t));
-
- if (cmd == XFS_IOC_PATH_TO_FSHANDLE) {
- /*
- * This handle only contains an fsid, zero the rest.
- */
- memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));
- hsize = sizeof(xfs_fsid_t);
- } else {
- int lock_mode;
-
- lock_mode = xfs_ilock_map_shared(ip);
- handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
- sizeof(handle.ha_fid.fid_len);
- handle.ha_fid.fid_pad = 0;
- handle.ha_fid.fid_gen = ip->i_d.di_gen;
- handle.ha_fid.fid_ino = ip->i_ino;
- xfs_iunlock_map_shared(ip, lock_mode);
-
- hsize = XFS_HSIZE(handle);
- }
-
- error = -EFAULT;
- if (copy_to_user(hreq->ohandle, &handle, hsize) ||
- copy_to_user(hreq->ohandlen, &hsize, sizeof(__s32)))
- goto out_put;
-
- error = 0;
-
- out_put:
- if (cmd == XFS_IOC_FD_TO_HANDLE)
- fput(file);
- else
- path_put(&path);
- return error;
-}
-
-/*
- * No need to do permission checks on the various pathname components
- * as the handle operations are privileged.
- */
-STATIC int
-xfs_handle_acceptable(
- void *context,
- struct dentry *dentry)
-{
- return 1;
-}
-
-/*
- * Convert userspace handle data into a dentry.
- */
-struct dentry *
-xfs_handle_to_dentry(
- struct file *parfilp,
- void __user *uhandle,
- u32 hlen)
-{
- xfs_handle_t handle;
- struct xfs_fid64 fid;
-
- /*
- * Only allow handle opens under a directory.
- */
- if (!S_ISDIR(parfilp->f_path.dentry->d_inode->i_mode))
- return ERR_PTR(-ENOTDIR);
-
- if (hlen != sizeof(xfs_handle_t))
- return ERR_PTR(-EINVAL);
- if (copy_from_user(&handle, uhandle, hlen))
- return ERR_PTR(-EFAULT);
- if (handle.ha_fid.fid_len !=
- sizeof(handle.ha_fid) - sizeof(handle.ha_fid.fid_len))
- return ERR_PTR(-EINVAL);
-
- memset(&fid, 0, sizeof(struct fid));
- fid.ino = handle.ha_fid.fid_ino;
- fid.gen = handle.ha_fid.fid_gen;
-
- return exportfs_decode_fh(parfilp->f_path.mnt, (struct fid *)&fid, 3,
- FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG,
- xfs_handle_acceptable, NULL);
-}
-
-STATIC struct dentry *
-xfs_handlereq_to_dentry(
- struct file *parfilp,
- xfs_fsop_handlereq_t *hreq)
-{
- return xfs_handle_to_dentry(parfilp, hreq->ihandle, hreq->ihandlen);
-}
-
-int
-xfs_open_by_handle(
- struct file *parfilp,
- xfs_fsop_handlereq_t *hreq)
-{
- const struct cred *cred = current_cred();
- int error;
- int fd;
- int permflag;
- struct file *filp;
- struct inode *inode;
- struct dentry *dentry;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
-
- dentry = xfs_handlereq_to_dentry(parfilp, hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- inode = dentry->d_inode;
-
- /* Restrict xfs_open_by_handle to directories & regular files. */
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode))) {
- error = -XFS_ERROR(EPERM);
- goto out_dput;
- }
-
-#if BITS_PER_LONG != 32
- hreq->oflags |= O_LARGEFILE;
-#endif
-
- /* Put open permission in namei format. */
- permflag = hreq->oflags;
- if ((permflag+1) & O_ACCMODE)
- permflag++;
- if (permflag & O_TRUNC)
- permflag |= 2;
-
- if ((!(permflag & O_APPEND) || (permflag & O_TRUNC)) &&
- (permflag & FMODE_WRITE) && IS_APPEND(inode)) {
- error = -XFS_ERROR(EPERM);
- goto out_dput;
- }
-
- if ((permflag & FMODE_WRITE) && IS_IMMUTABLE(inode)) {
- error = -XFS_ERROR(EACCES);
- goto out_dput;
- }
-
- /* Can't write directories. */
- if (S_ISDIR(inode->i_mode) && (permflag & FMODE_WRITE)) {
- error = -XFS_ERROR(EISDIR);
- goto out_dput;
- }
-
- fd = get_unused_fd();
- if (fd < 0) {
- error = fd;
- goto out_dput;
- }
-
- filp = dentry_open(dentry, mntget(parfilp->f_path.mnt),
- hreq->oflags, cred);
- if (IS_ERR(filp)) {
- put_unused_fd(fd);
- return PTR_ERR(filp);
- }
-
- if (inode->i_mode & S_IFREG) {
- filp->f_flags |= O_NOATIME;
- filp->f_mode |= FMODE_NOCMTIME;
- }
-
- fd_install(fd, filp);
- return fd;
-
- out_dput:
- dput(dentry);
- return error;
-}
-
-/*
- * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's
- * unused first argument.
- */
-STATIC int
-do_readlink(
- char __user *buffer,
- int buflen,
- const char *link)
-{
- int len;
-
- len = PTR_ERR(link);
- if (IS_ERR(link))
- goto out;
-
- len = strlen(link);
- if (len > (unsigned) buflen)
- len = buflen;
- if (copy_to_user(buffer, link, len))
- len = -EFAULT;
- out:
- return len;
-}
-
-
-int
-xfs_readlink_by_handle(
- struct file *parfilp,
- xfs_fsop_handlereq_t *hreq)
-{
- struct dentry *dentry;
- __u32 olen;
- void *link;
- int error;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
-
- dentry = xfs_handlereq_to_dentry(parfilp, hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- /* Restrict this handle operation to symlinks only. */
- if (!S_ISLNK(dentry->d_inode->i_mode)) {
- error = -XFS_ERROR(EINVAL);
- goto out_dput;
- }
-
- if (copy_from_user(&olen, hreq->ohandlen, sizeof(__u32))) {
- error = -XFS_ERROR(EFAULT);
- goto out_dput;
- }
-
- link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
- if (!link) {
- error = -XFS_ERROR(ENOMEM);
- goto out_dput;
- }
-
- error = -xfs_readlink(XFS_I(dentry->d_inode), link);
- if (error)
- goto out_kfree;
- error = do_readlink(hreq->ohandle, olen, link);
- if (error)
- goto out_kfree;
-
- out_kfree:
- kfree(link);
- out_dput:
- dput(dentry);
- return error;
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- struct fsdmidata fsd;
- xfs_fsop_setdm_handlereq_t dmhreq;
- struct dentry *dentry;
-
- if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
- error = -XFS_ERROR(EPERM);
- goto out;
- }
-
- if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
- error = -XFS_ERROR(EFAULT);
- goto out;
- }
-
- error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
- fsd.fsd_dmstate);
-
- out:
- dput(dentry);
- return error;
-}
-
-STATIC int
-xfs_attrlist_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error = -ENOMEM;
- attrlist_cursor_kern_t *cursor;
- xfs_fsop_attrlist_handlereq_t al_hreq;
- struct dentry *dentry;
- char *kbuf;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
-
- /*
- * Reject flags, only allow namespaces.
- */
- if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
- return -XFS_ERROR(EINVAL);
-
- dentry = xfs_handlereq_to_dentry(parfilp, &al_hreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
- if (!kbuf)
- goto out_dput;
-
- cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
- al_hreq.flags, cursor);
- if (error)
- goto out_kfree;
-
- if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
- error = -EFAULT;
-
- out_kfree:
- kfree(kbuf);
- out_dput:
- dput(dentry);
- return error;
-}
-
-int
-xfs_attrmulti_attr_get(
- struct inode *inode,
- unsigned char *name,
- unsigned char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags)
-{
- unsigned char *kbuf;
- int error = EFAULT;
-
- if (*len > XATTR_SIZE_MAX)
- return EINVAL;
- kbuf = kmalloc(*len, GFP_KERNEL);
- if (!kbuf)
- return ENOMEM;
-
- error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
- if (error)
- goto out_kfree;
-
- if (copy_to_user(ubuf, kbuf, *len))
- error = EFAULT;
-
- out_kfree:
- kfree(kbuf);
- return error;
-}
-
-int
-xfs_attrmulti_attr_set(
- struct inode *inode,
- unsigned char *name,
- const unsigned char __user *ubuf,
- __uint32_t len,
- __uint32_t flags)
-{
- unsigned char *kbuf;
- int error = EFAULT;
-
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return EPERM;
- if (len > XATTR_SIZE_MAX)
- return EINVAL;
-
- kbuf = memdup_user(ubuf, len);
- if (IS_ERR(kbuf))
- return PTR_ERR(kbuf);
-
- error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
-
- return error;
-}
-
-int
-xfs_attrmulti_attr_remove(
- struct inode *inode,
- unsigned char *name,
- __uint32_t flags)
-{
- if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- return EPERM;
- return xfs_attr_remove(XFS_I(inode), name, flags);
-}
-
-STATIC int
-xfs_attrmulti_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- xfs_attr_multiop_t *ops;
- xfs_fsop_attrmulti_handlereq_t am_hreq;
- struct dentry *dentry;
- unsigned int i, size;
- unsigned char *attr_name;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&am_hreq, arg, sizeof(xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- /* overflow check */
- if (am_hreq.opcount >= INT_MAX / sizeof(xfs_attr_multiop_t))
- return -E2BIG;
-
- dentry = xfs_handlereq_to_dentry(parfilp, &am_hreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- error = E2BIG;
- size = am_hreq.opcount * sizeof(xfs_attr_multiop_t);
- if (!size || size > 16 * PAGE_SIZE)
- goto out_dput;
-
- ops = memdup_user(am_hreq.ops, size);
- if (IS_ERR(ops)) {
- error = PTR_ERR(ops);
- goto out_dput;
- }
-
- attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
- if (!attr_name)
- goto out_kfree_ops;
-
- error = 0;
- for (i = 0; i < am_hreq.opcount; i++) {
- ops[i].am_error = strncpy_from_user((char *)attr_name,
- ops[i].am_attrname, MAXNAMELEN);
- if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
- error = -ERANGE;
- if (ops[i].am_error < 0)
- break;
-
- switch (ops[i].am_opcode) {
- case ATTR_OP_GET:
- ops[i].am_error = xfs_attrmulti_attr_get(
- dentry->d_inode, attr_name,
- ops[i].am_attrvalue, &ops[i].am_length,
- ops[i].am_flags);
- break;
- case ATTR_OP_SET:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
- if (ops[i].am_error)
- break;
- ops[i].am_error = xfs_attrmulti_attr_set(
- dentry->d_inode, attr_name,
- ops[i].am_attrvalue, ops[i].am_length,
- ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
- break;
- case ATTR_OP_REMOVE:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
- if (ops[i].am_error)
- break;
- ops[i].am_error = xfs_attrmulti_attr_remove(
- dentry->d_inode, attr_name,
- ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
- break;
- default:
- ops[i].am_error = EINVAL;
- }
- }
-
- if (copy_to_user(am_hreq.ops, ops, size))
- error = XFS_ERROR(EFAULT);
-
- kfree(attr_name);
- out_kfree_ops:
- kfree(ops);
- out_dput:
- dput(dentry);
- return -error;
-}
-
-int
-xfs_ioc_space(
- struct xfs_inode *ip,
- struct inode *inode,
- struct file *filp,
- int ioflags,
- unsigned int cmd,
- xfs_flock64_t *bf)
-{
- int attr_flags = 0;
- int error;
-
- /*
- * Only allow the sys admin to reserve space unless
- * unwritten extents are enabled.
- */
- if (!xfs_sb_version_hasextflgbit(&ip->i_mount->m_sb) &&
- !capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
-
- if (inode->i_flags & (S_IMMUTABLE|S_APPEND))
- return -XFS_ERROR(EPERM);
-
- if (!(filp->f_mode & FMODE_WRITE))
- return -XFS_ERROR(EBADF);
-
- if (!S_ISREG(inode->i_mode))
- return -XFS_ERROR(EINVAL);
-
- if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- attr_flags |= XFS_ATTR_NONBLOCK;
-
- if (filp->f_flags & O_DSYNC)
- attr_flags |= XFS_ATTR_SYNC;
-
- if (ioflags & IO_INVIS)
- attr_flags |= XFS_ATTR_DMI;
-
- error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
- return -error;
-}
-
-STATIC int
-xfs_ioc_bulkstat(
- xfs_mount_t *mp,
- unsigned int cmd,
- void __user *arg)
-{
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
- int error;
-
- /* done = 1 if there are more stats to get and if bulkstat */
- /* should be called again (unused here, but used in dmapi) */
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (copy_from_user(&bulkreq, arg, sizeof(xfs_fsop_bulkreq_t)))
- return -XFS_ERROR(EFAULT);
-
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
-
- if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
-
- if (bulkreq.ubuffer == NULL)
- return -XFS_ERROR(EINVAL);
-
- if (cmd == XFS_IOC_FSINUMBERS)
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, xfs_inumbers_fmt);
- else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE)
- error = xfs_bulkstat_single(mp, &inlast,
- bulkreq.ubuffer, &done);
- else /* XFS_IOC_FSBULKSTAT */
- error = xfs_bulkstat(mp, &inlast, &count, xfs_bulkstat_one,
- sizeof(xfs_bstat_t), bulkreq.ubuffer,
- &done);
-
- if (error)
- return -error;
-
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
-
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
- }
-
- return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry_v1(
- xfs_mount_t *mp,
- void __user *arg)
-{
- xfs_fsop_geom_t fsgeo;
- int error;
-
- error = xfs_fs_geometry(mp, &fsgeo, 3);
- if (error)
- return -error;
-
- /*
- * Caller should have passed an argument of type
- * xfs_fsop_geom_v1_t. This is a proper subset of the
- * xfs_fsop_geom_t that xfs_fs_geometry() fills in.
- */
- if (copy_to_user(arg, &fsgeo, sizeof(xfs_fsop_geom_v1_t)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_ioc_fsgeometry(
- xfs_mount_t *mp,
- void __user *arg)
-{
- xfs_fsop_geom_t fsgeo;
- int error;
-
- error = xfs_fs_geometry(mp, &fsgeo, 4);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &fsgeo, sizeof(fsgeo)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-/*
- * Linux extended inode flags interface.
- */
-
-STATIC unsigned int
-xfs_merge_ioc_xflags(
- unsigned int flags,
- unsigned int start)
-{
- unsigned int xflags = start;
-
- if (flags & FS_IMMUTABLE_FL)
- xflags |= XFS_XFLAG_IMMUTABLE;
- else
- xflags &= ~XFS_XFLAG_IMMUTABLE;
- if (flags & FS_APPEND_FL)
- xflags |= XFS_XFLAG_APPEND;
- else
- xflags &= ~XFS_XFLAG_APPEND;
- if (flags & FS_SYNC_FL)
- xflags |= XFS_XFLAG_SYNC;
- else
- xflags &= ~XFS_XFLAG_SYNC;
- if (flags & FS_NOATIME_FL)
- xflags |= XFS_XFLAG_NOATIME;
- else
- xflags &= ~XFS_XFLAG_NOATIME;
- if (flags & FS_NODUMP_FL)
- xflags |= XFS_XFLAG_NODUMP;
- else
- xflags &= ~XFS_XFLAG_NODUMP;
-
- return xflags;
-}
-
-STATIC unsigned int
-xfs_di2lxflags(
- __uint16_t di_flags)
-{
- unsigned int flags = 0;
-
- if (di_flags & XFS_DIFLAG_IMMUTABLE)
- flags |= FS_IMMUTABLE_FL;
- if (di_flags & XFS_DIFLAG_APPEND)
- flags |= FS_APPEND_FL;
- if (di_flags & XFS_DIFLAG_SYNC)
- flags |= FS_SYNC_FL;
- if (di_flags & XFS_DIFLAG_NOATIME)
- flags |= FS_NOATIME_FL;
- if (di_flags & XFS_DIFLAG_NODUMP)
- flags |= FS_NODUMP_FL;
- return flags;
-}
-
-STATIC int
-xfs_ioc_fsgetxattr(
- xfs_inode_t *ip,
- int attr,
- void __user *arg)
-{
- struct fsxattr fa;
-
- memset(&fa, 0, sizeof(struct fsxattr));
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- fa.fsx_xflags = xfs_ip2xflags(ip);
- fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_projid = xfs_get_projid(ip);
-
- if (attr) {
- if (ip->i_afp) {
- if (ip->i_afp->if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = ip->i_afp->if_bytes /
- sizeof(xfs_bmbt_rec_t);
- else
- fa.fsx_nextents = ip->i_d.di_anextents;
- } else
- fa.fsx_nextents = 0;
- } else {
- if (ip->i_df.if_flags & XFS_IFEXTENTS)
- fa.fsx_nextents = ip->i_df.if_bytes /
- sizeof(xfs_bmbt_rec_t);
- else
- fa.fsx_nextents = ip->i_d.di_nextents;
- }
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
-
- if (copy_to_user(arg, &fa, sizeof(fa)))
- return -EFAULT;
- return 0;
-}
-
-STATIC void
-xfs_set_diflags(
- struct xfs_inode *ip,
- unsigned int xflags)
-{
- unsigned int di_flags;
-
- /* can't set PREALLOC this way, just preserve it */
- di_flags = (ip->i_d.di_flags & XFS_DIFLAG_PREALLOC);
- if (xflags & XFS_XFLAG_IMMUTABLE)
- di_flags |= XFS_DIFLAG_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
- di_flags |= XFS_DIFLAG_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
- di_flags |= XFS_DIFLAG_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
- di_flags |= XFS_DIFLAG_NOATIME;
- if (xflags & XFS_XFLAG_NODUMP)
- di_flags |= XFS_DIFLAG_NODUMP;
- if (xflags & XFS_XFLAG_PROJINHERIT)
- di_flags |= XFS_DIFLAG_PROJINHERIT;
- if (xflags & XFS_XFLAG_NODEFRAG)
- di_flags |= XFS_DIFLAG_NODEFRAG;
- if (xflags & XFS_XFLAG_FILESTREAM)
- di_flags |= XFS_DIFLAG_FILESTREAM;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
- if (xflags & XFS_XFLAG_RTINHERIT)
- di_flags |= XFS_DIFLAG_RTINHERIT;
- if (xflags & XFS_XFLAG_NOSYMLINKS)
- di_flags |= XFS_DIFLAG_NOSYMLINKS;
- if (xflags & XFS_XFLAG_EXTSZINHERIT)
- di_flags |= XFS_DIFLAG_EXTSZINHERIT;
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
- if (xflags & XFS_XFLAG_REALTIME)
- di_flags |= XFS_DIFLAG_REALTIME;
- if (xflags & XFS_XFLAG_EXTSIZE)
- di_flags |= XFS_DIFLAG_EXTSIZE;
- }
-
- ip->i_d.di_flags = di_flags;
-}
-
-STATIC void
-xfs_diflags_to_linux(
- struct xfs_inode *ip)
-{
- struct inode *inode = VFS_I(ip);
- unsigned int xflags = xfs_ip2xflags(ip);
-
- if (xflags & XFS_XFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (xflags & XFS_XFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (xflags & XFS_XFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (xflags & XFS_XFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
-}
-
-#define FSX_PROJID 1
-#define FSX_EXTSIZE 2
-#define FSX_XFLAGS 4
-#define FSX_NONBLOCK 8
-
-STATIC int
-xfs_ioctl_setattr(
- xfs_inode_t *ip,
- struct fsxattr *fa,
- int mask)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- unsigned int lock_flags = 0;
- struct xfs_dquot *udqp = NULL;
- struct xfs_dquot *gdqp = NULL;
- struct xfs_dquot *olddquot = NULL;
- int code;
-
- trace_xfs_ioctl_setattr(ip);
-
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- /*
- * Disallow 32bit project ids when projid32bit feature is not enabled.
- */
- if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
- !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
- return XFS_ERROR(EINVAL);
-
- /*
- * If disk quotas is on, we make sure that the dquots do exist on disk,
- * before we start any other transactions. Trying to do this later
- * is messy. We don't care to take a readlock to look at the ids
- * in inode here, because we can't hold it across the trans_reserve.
- * If the IDs do change before we take the ilock, we're covered
- * because the i_*dquot fields will get updated anyway.
- */
- if (XFS_IS_QUOTA_ON(mp) && (mask & FSX_PROJID)) {
- code = xfs_qm_vop_dqalloc(ip, ip->i_d.di_uid,
- ip->i_d.di_gid, fa->fsx_projid,
- XFS_QMOPT_PQUOTA, &udqp, &gdqp);
- if (code)
- return code;
- }
-
- /*
- * For the other attributes, we acquire the inode lock and
- * first do an error checking pass.
- */
- tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
- code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
- if (code)
- goto error_return;
-
- lock_flags = XFS_ILOCK_EXCL;
- xfs_ilock(ip, lock_flags);
-
- /*
- * CAP_FOWNER overrides the following restrictions:
- *
- * The user ID of the calling process must be equal
- * to the file owner ID, except in cases where the
- * CAP_FSETID capability is applicable.
- */
- if (current_fsuid() != ip->i_d.di_uid && !capable(CAP_FOWNER)) {
- code = XFS_ERROR(EPERM);
- goto error_return;
- }
-
- /*
- * Do a quota reservation only if projid is actually going to change.
- */
- if (mask & FSX_PROJID) {
- if (XFS_IS_QUOTA_RUNNING(mp) &&
- XFS_IS_PQUOTA_ON(mp) &&
- xfs_get_projid(ip) != fa->fsx_projid) {
- ASSERT(tp);
- code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
- capable(CAP_FOWNER) ?
- XFS_QMOPT_FORCE_RES : 0);
- if (code) /* out of quota */
- goto error_return;
- }
- }
-
- if (mask & FSX_EXTSIZE) {
- /*
- * Can't change extent size if any extents are allocated.
- */
- if (ip->i_d.di_nextents &&
- ((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) !=
- fa->fsx_extsize)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
- goto error_return;
- }
-
- /*
- * Extent size must be a multiple of the appropriate block
- * size, if set at all. It must also be smaller than the
- * maximum extent size supported by the filesystem.
- *
- * Also, for non-realtime files, limit the extent size hint to
- * half the size of the AGs in the filesystem so alignment
- * doesn't result in extents larger than an AG.
- */
- if (fa->fsx_extsize != 0) {
- xfs_extlen_t size;
- xfs_fsblock_t extsize_fsb;
-
- extsize_fsb = XFS_B_TO_FSB(mp, fa->fsx_extsize);
- if (extsize_fsb > MAXEXTLEN) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
-
- if (XFS_IS_REALTIME_INODE(ip) ||
- ((mask & FSX_XFLAGS) &&
- (fa->fsx_xflags & XFS_XFLAG_REALTIME))) {
- size = mp->m_sb.sb_rextsize <<
- mp->m_sb.sb_blocklog;
- } else {
- size = mp->m_sb.sb_blocksize;
- if (extsize_fsb > mp->m_sb.sb_agblocks / 2) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
-
- if (fa->fsx_extsize % size) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
- }
-
-
- if (mask & FSX_XFLAGS) {
- /*
- * Can't change realtime flag if any extents are allocated.
- */
- if ((ip->i_d.di_nextents || ip->i_delayed_blks) &&
- (XFS_IS_REALTIME_INODE(ip)) !=
- (fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
- code = XFS_ERROR(EINVAL); /* EFBIG? */
- goto error_return;
- }
-
- /*
- * If realtime flag is set then must have realtime data.
- */
- if ((fa->fsx_xflags & XFS_XFLAG_REALTIME)) {
- if ((mp->m_sb.sb_rblocks == 0) ||
- (mp->m_sb.sb_rextsize == 0) ||
- (ip->i_d.di_extsize % mp->m_sb.sb_rextsize)) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
- }
-
- /*
- * Can't modify an immutable/append-only file unless
- * we have appropriate permission.
- */
- if ((ip->i_d.di_flags &
- (XFS_DIFLAG_IMMUTABLE|XFS_DIFLAG_APPEND) ||
- (fa->fsx_xflags &
- (XFS_XFLAG_IMMUTABLE | XFS_XFLAG_APPEND))) &&
- !capable(CAP_LINUX_IMMUTABLE)) {
- code = XFS_ERROR(EPERM);
- goto error_return;
- }
- }
-
- xfs_trans_ijoin(tp, ip);
-
- /*
- * Change file ownership. Must be the owner or privileged.
- */
- if (mask & FSX_PROJID) {
- /*
- * CAP_FSETID overrides the following restrictions:
- *
- * The set-user-ID and set-group-ID bits of a file will be
- * cleared upon successful return from chown()
- */
- if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
- !capable(CAP_FSETID))
- ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
-
- /*
- * Change the ownerships and register quota modifications
- * in the transaction.
- */
- if (xfs_get_projid(ip) != fa->fsx_projid) {
- if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
- olddquot = xfs_qm_vop_chown(tp, ip,
- &ip->i_gdquot, gdqp);
- }
- xfs_set_projid(ip, fa->fsx_projid);
-
- /*
- * We may have to rev the inode as well as
- * the superblock version number since projids didn't
- * exist before DINODE_VERSION_2 and SB_VERSION_NLINK.
- */
- if (ip->i_d.di_version == 1)
- xfs_bump_ino_vers2(tp, ip);
- }
-
- }
-
- if (mask & FSX_EXTSIZE)
- ip->i_d.di_extsize = fa->fsx_extsize >> mp->m_sb.sb_blocklog;
- if (mask & FSX_XFLAGS) {
- xfs_set_diflags(ip, fa->fsx_xflags);
- xfs_diflags_to_linux(ip);
- }
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- XFS_STATS_INC(xs_ig_attrchg);
-
- /*
- * If this is a synchronous mount, make sure that the
- * transaction goes to disk before returning to the user.
- * This is slightly sub-optimal in that truncates require
- * two sync transactions instead of one for wsync filesystems.
- * One for the truncate and one for the timestamps since we
- * don't want to change the timestamps unless we're sure the
- * truncate worked. Truncates are less than 1% of the laddis
- * mix so this probably isn't worth the trouble to optimize.
- */
- if (mp->m_flags & XFS_MOUNT_WSYNC)
- xfs_trans_set_sync(tp);
- code = xfs_trans_commit(tp, 0);
- xfs_iunlock(ip, lock_flags);
-
- /*
- * Release any dquot(s) the inode had kept before chown.
- */
- xfs_qm_dqrele(olddquot);
- xfs_qm_dqrele(udqp);
- xfs_qm_dqrele(gdqp);
-
- return code;
-
- error_return:
- xfs_qm_dqrele(udqp);
- xfs_qm_dqrele(gdqp);
- xfs_trans_cancel(tp, 0);
- if (lock_flags)
- xfs_iunlock(ip, lock_flags);
- return code;
-}
-
-STATIC int
-xfs_ioc_fssetxattr(
- xfs_inode_t *ip,
- struct file *filp,
- void __user *arg)
-{
- struct fsxattr fa;
- unsigned int mask;
-
- if (copy_from_user(&fa, arg, sizeof(fa)))
- return -EFAULT;
-
- mask = FSX_XFLAGS | FSX_EXTSIZE | FSX_PROJID;
- if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- mask |= FSX_NONBLOCK;
-
- return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_ioc_getxflags(
- xfs_inode_t *ip,
- void __user *arg)
-{
- unsigned int flags;
-
- flags = xfs_di2lxflags(ip->i_d.di_flags);
- if (copy_to_user(arg, &flags, sizeof(flags)))
- return -EFAULT;
- return 0;
-}
-
-STATIC int
-xfs_ioc_setxflags(
- xfs_inode_t *ip,
- struct file *filp,
- void __user *arg)
-{
- struct fsxattr fa;
- unsigned int flags;
- unsigned int mask;
-
- if (copy_from_user(&flags, arg, sizeof(flags)))
- return -EFAULT;
-
- if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \
- FS_NOATIME_FL | FS_NODUMP_FL | \
- FS_SYNC_FL))
- return -EOPNOTSUPP;
-
- mask = FSX_XFLAGS;
- if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
- mask |= FSX_NONBLOCK;
- fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
-
- return -xfs_ioctl_setattr(ip, &fa, mask);
-}
-
-STATIC int
-xfs_getbmap_format(void **ap, struct getbmapx *bmv, int *full)
-{
- struct getbmap __user *base = *ap;
-
- /* copy only getbmap portion (not getbmapx) */
- if (copy_to_user(base, bmv, sizeof(struct getbmap)))
- return XFS_ERROR(EFAULT);
-
- *ap += sizeof(struct getbmap);
- return 0;
-}
-
-STATIC int
-xfs_ioc_getbmap(
- struct xfs_inode *ip,
- int ioflags,
- unsigned int cmd,
- void __user *arg)
-{
- struct getbmapx bmx;
- int error;
-
- if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
- return -XFS_ERROR(EFAULT);
-
- if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
-
- bmx.bmv_iflags = (cmd == XFS_IOC_GETBMAPA ? BMV_IF_ATTRFORK : 0);
- if (ioflags & IO_INVIS)
- bmx.bmv_iflags |= BMV_IF_NO_DMAPI_READ;
-
- error = xfs_getbmap(ip, &bmx, xfs_getbmap_format,
- (struct getbmap *)arg+1);
- if (error)
- return -error;
-
- /* copy back header - only size of getbmap */
- if (copy_to_user(arg, &bmx, sizeof(struct getbmap)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_getbmapx_format(void **ap, struct getbmapx *bmv, int *full)
-{
- struct getbmapx __user *base = *ap;
-
- if (copy_to_user(base, bmv, sizeof(struct getbmapx)))
- return XFS_ERROR(EFAULT);
-
- *ap += sizeof(struct getbmapx);
- return 0;
-}
-
-STATIC int
-xfs_ioc_getbmapx(
- struct xfs_inode *ip,
- void __user *arg)
-{
- struct getbmapx bmx;
- int error;
-
- if (copy_from_user(&bmx, arg, sizeof(bmx)))
- return -XFS_ERROR(EFAULT);
-
- if (bmx.bmv_count < 2)
- return -XFS_ERROR(EINVAL);
-
- if (bmx.bmv_iflags & (~BMV_IF_VALID))
- return -XFS_ERROR(EINVAL);
-
- error = xfs_getbmap(ip, &bmx, xfs_getbmapx_format,
- (struct getbmapx *)arg+1);
- if (error)
- return -error;
-
- /* copy back header */
- if (copy_to_user(arg, &bmx, sizeof(struct getbmapx)))
- return -XFS_ERROR(EFAULT);
-
- return 0;
-}
-
-/*
- * Note: some of the ioctl's return positive numbers as a
- * byte count indicating success, such as readlink_by_handle.
- * So we don't "sign flip" like most other routines. This means
- * true errors need to be returned as a negative value.
- */
-long
-xfs_file_ioctl(
- struct file *filp,
- unsigned int cmd,
- unsigned long p)
-{
- struct inode *inode = filp->f_path.dentry->d_inode;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- void __user *arg = (void __user *)p;
- int ioflags = 0;
- int error;
-
- if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- trace_xfs_file_ioctl(ip);
-
- switch (cmd) {
- case FITRIM:
- return xfs_ioc_trim(mp, arg);
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- case XFS_IOC_ZERO_RANGE: {
- xfs_flock64_t bf;
-
- if (copy_from_user(&bf, arg, sizeof(bf)))
- return -XFS_ERROR(EFAULT);
- return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
- }
- case XFS_IOC_DIOINFO: {
- struct dioattr da;
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- da.d_mem = da.d_miniosz = 1 << target->bt_sshift;
- da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
-
- if (copy_to_user(arg, &da, sizeof(da)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_FSBULKSTAT_SINGLE:
- case XFS_IOC_FSBULKSTAT:
- case XFS_IOC_FSINUMBERS:
- return xfs_ioc_bulkstat(mp, cmd, arg);
-
- case XFS_IOC_FSGEOMETRY_V1:
- return xfs_ioc_fsgeometry_v1(mp, arg);
-
- case XFS_IOC_FSGEOMETRY:
- return xfs_ioc_fsgeometry(mp, arg);
-
- case XFS_IOC_GETVERSION:
- return put_user(inode->i_generation, (int __user *)arg);
-
- case XFS_IOC_FSGETXATTR:
- return xfs_ioc_fsgetxattr(ip, 0, arg);
- case XFS_IOC_FSGETXATTRA:
- return xfs_ioc_fsgetxattr(ip, 1, arg);
- case XFS_IOC_FSSETXATTR:
- return xfs_ioc_fssetxattr(ip, filp, arg);
- case XFS_IOC_GETXFLAGS:
- return xfs_ioc_getxflags(ip, arg);
- case XFS_IOC_SETXFLAGS:
- return xfs_ioc_setxflags(ip, filp, arg);
-
- case XFS_IOC_FSSETDM: {
- struct fsdmidata dmi;
-
- if (copy_from_user(&dmi, arg, sizeof(dmi)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
- dmi.fsd_dmstate);
- return -error;
- }
-
- case XFS_IOC_GETBMAP:
- case XFS_IOC_GETBMAPA:
- return xfs_ioc_getbmap(ip, ioflags, cmd, arg);
-
- case XFS_IOC_GETBMAPX:
- return xfs_ioc_getbmapx(ip, arg);
-
- case XFS_IOC_FD_TO_HANDLE:
- case XFS_IOC_PATH_TO_HANDLE:
- case XFS_IOC_PATH_TO_FSHANDLE: {
- xfs_fsop_handlereq_t hreq;
-
- if (copy_from_user(&hreq, arg, sizeof(hreq)))
- return -XFS_ERROR(EFAULT);
- return xfs_find_handle(cmd, &hreq);
- }
- case XFS_IOC_OPEN_BY_HANDLE: {
- xfs_fsop_handlereq_t hreq;
-
- if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- return xfs_open_by_handle(filp, &hreq);
- }
- case XFS_IOC_FSSETDM_BY_HANDLE:
- return xfs_fssetdm_by_handle(filp, arg);
-
- case XFS_IOC_READLINK_BY_HANDLE: {
- xfs_fsop_handlereq_t hreq;
-
- if (copy_from_user(&hreq, arg, sizeof(xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- return xfs_readlink_by_handle(filp, &hreq);
- }
- case XFS_IOC_ATTRLIST_BY_HANDLE:
- return xfs_attrlist_by_handle(filp, arg);
-
- case XFS_IOC_ATTRMULTI_BY_HANDLE:
- return xfs_attrmulti_by_handle(filp, arg);
-
- case XFS_IOC_SWAPEXT: {
- struct xfs_swapext sxp;
-
- if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
- return -XFS_ERROR(EFAULT);
- error = xfs_swapext(&sxp);
- return -error;
- }
-
- case XFS_IOC_FSCOUNTS: {
- xfs_fsop_counts_t out;
-
- error = xfs_fs_counts(mp, &out);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_SET_RESBLKS: {
- xfs_fsop_resblks_t inout;
- __uint64_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return -XFS_ERROR(EROFS);
-
- if (copy_from_user(&inout, arg, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
-
- /* input parameter is passed in resblks field of structure */
- in = inout.resblks;
- error = xfs_reserve_blocks(mp, &in, &inout);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &inout, sizeof(inout)))
- return -XFS_ERROR(EFAULT);
- return 0;
- }
-
- case XFS_IOC_GET_RESBLKS: {
- xfs_fsop_resblks_t out;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- error = xfs_reserve_blocks(mp, NULL, &out);
- if (error)
- return -error;
-
- if (copy_to_user(arg, &out, sizeof(out)))
- return -XFS_ERROR(EFAULT);
-
- return 0;
- }
-
- case XFS_IOC_FSGROWFSDATA: {
- xfs_growfs_data_t in;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_growfs_data(mp, &in);
- return -error;
- }
-
- case XFS_IOC_FSGROWFSLOG: {
- xfs_growfs_log_t in;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_growfs_log(mp, &in);
- return -error;
- }
-
- case XFS_IOC_FSGROWFSRT: {
- xfs_growfs_rt_t in;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_growfs_rt(mp, &in);
- return -error;
- }
-
- case XFS_IOC_GOINGDOWN: {
- __uint32_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (get_user(in, (__uint32_t __user *)arg))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_fs_goingdown(mp, in);
- return -error;
- }
-
- case XFS_IOC_ERROR_INJECTION: {
- xfs_error_injection_t in;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (copy_from_user(&in, arg, sizeof(in)))
- return -XFS_ERROR(EFAULT);
-
- error = xfs_errortag_add(in.errtag, mp);
- return -error;
- }
-
- case XFS_IOC_ERROR_CLEARALL:
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- error = xfs_errortag_clearall(mp, 1);
- return -error;
-
- default:
- return -ENOTTY;
- }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.h b/fs/xfs/linux-2.6/xfs_ioctl.h
deleted file mode 100644
index d56173b..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_IOCTL_H__
-#define __XFS_IOCTL_H__
-
-extern int
-xfs_ioc_space(
- struct xfs_inode *ip,
- struct inode *inode,
- struct file *filp,
- int ioflags,
- unsigned int cmd,
- xfs_flock64_t *bf);
-
-extern int
-xfs_find_handle(
- unsigned int cmd,
- xfs_fsop_handlereq_t *hreq);
-
-extern int
-xfs_open_by_handle(
- struct file *parfilp,
- xfs_fsop_handlereq_t *hreq);
-
-extern int
-xfs_readlink_by_handle(
- struct file *parfilp,
- xfs_fsop_handlereq_t *hreq);
-
-extern int
-xfs_attrmulti_attr_get(
- struct inode *inode,
- unsigned char *name,
- unsigned char __user *ubuf,
- __uint32_t *len,
- __uint32_t flags);
-
-extern int
-xfs_attrmulti_attr_set(
- struct inode *inode,
- unsigned char *name,
- const unsigned char __user *ubuf,
- __uint32_t len,
- __uint32_t flags);
-
-extern int
-xfs_attrmulti_attr_remove(
- struct inode *inode,
- unsigned char *name,
- __uint32_t flags);
-
-extern struct dentry *
-xfs_handle_to_dentry(
- struct file *parfilp,
- void __user *uhandle,
- u32 hlen);
-
-extern long
-xfs_file_ioctl(
- struct file *filp,
- unsigned int cmd,
- unsigned long p);
-
-extern long
-xfs_file_compat_ioctl(
- struct file *file,
- unsigned int cmd,
- unsigned long arg);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
deleted file mode 100644
index 54e623b..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ /dev/null
@@ -1,672 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <linux/compat.h>
-#include <linux/ioctl.h>
-#include <linux/mount.h>
-#include <linux/slab.h>
-#include <asm/uaccess.h>
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_vnode.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_error.h"
-#include "xfs_dfrag.h"
-#include "xfs_vnodeops.h"
-#include "xfs_fsops.h"
-#include "xfs_alloc.h"
-#include "xfs_rtalloc.h"
-#include "xfs_attr.h"
-#include "xfs_ioctl.h"
-#include "xfs_ioctl32.h"
-#include "xfs_trace.h"
-
-#define _NATIVE_IOC(cmd, type) \
- _IOC(_IOC_DIR(cmd), _IOC_TYPE(cmd), _IOC_NR(cmd), sizeof(type))
-
-#ifdef BROKEN_X86_ALIGNMENT
-STATIC int
-xfs_compat_flock64_copyin(
- xfs_flock64_t *bf,
- compat_xfs_flock64_t __user *arg32)
-{
- if (get_user(bf->l_type, &arg32->l_type) ||
- get_user(bf->l_whence, &arg32->l_whence) ||
- get_user(bf->l_start, &arg32->l_start) ||
- get_user(bf->l_len, &arg32->l_len) ||
- get_user(bf->l_sysid, &arg32->l_sysid) ||
- get_user(bf->l_pid, &arg32->l_pid) ||
- copy_from_user(bf->l_pad, &arg32->l_pad, 4*sizeof(u32)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_compat_ioc_fsgeometry_v1(
- struct xfs_mount *mp,
- compat_xfs_fsop_geom_v1_t __user *arg32)
-{
- xfs_fsop_geom_t fsgeo;
- int error;
-
- error = xfs_fs_geometry(mp, &fsgeo, 3);
- if (error)
- return -error;
- /* The 32-bit variant simply has some padding at the end */
- if (copy_to_user(arg32, &fsgeo, sizeof(struct compat_xfs_fsop_geom_v1)))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_compat_growfs_data_copyin(
- struct xfs_growfs_data *in,
- compat_xfs_growfs_data_t __user *arg32)
-{
- if (get_user(in->newblocks, &arg32->newblocks) ||
- get_user(in->imaxpct, &arg32->imaxpct))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_compat_growfs_rt_copyin(
- struct xfs_growfs_rt *in,
- compat_xfs_growfs_rt_t __user *arg32)
-{
- if (get_user(in->newblocks, &arg32->newblocks) ||
- get_user(in->extsize, &arg32->extsize))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-STATIC int
-xfs_inumbers_fmt_compat(
- void __user *ubuffer,
- const xfs_inogrp_t *buffer,
- long count,
- long *written)
-{
- compat_xfs_inogrp_t __user *p32 = ubuffer;
- long i;
-
- for (i = 0; i < count; i++) {
- if (put_user(buffer[i].xi_startino, &p32[i].xi_startino) ||
- put_user(buffer[i].xi_alloccount, &p32[i].xi_alloccount) ||
- put_user(buffer[i].xi_allocmask, &p32[i].xi_allocmask))
- return -XFS_ERROR(EFAULT);
- }
- *written = count * sizeof(*p32);
- return 0;
-}
-
-#else
-#define xfs_inumbers_fmt_compat xfs_inumbers_fmt
-#endif /* BROKEN_X86_ALIGNMENT */
-
-STATIC int
-xfs_ioctl32_bstime_copyin(
- xfs_bstime_t *bstime,
- compat_xfs_bstime_t __user *bstime32)
-{
- compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */
-
- if (get_user(sec32, &bstime32->tv_sec) ||
- get_user(bstime->tv_nsec, &bstime32->tv_nsec))
- return -XFS_ERROR(EFAULT);
- bstime->tv_sec = sec32;
- return 0;
-}
-
-/* xfs_bstat_t has differing alignment on intel, & bstime_t sizes everywhere */
-STATIC int
-xfs_ioctl32_bstat_copyin(
- xfs_bstat_t *bstat,
- compat_xfs_bstat_t __user *bstat32)
-{
- if (get_user(bstat->bs_ino, &bstat32->bs_ino) ||
- get_user(bstat->bs_mode, &bstat32->bs_mode) ||
- get_user(bstat->bs_nlink, &bstat32->bs_nlink) ||
- get_user(bstat->bs_uid, &bstat32->bs_uid) ||
- get_user(bstat->bs_gid, &bstat32->bs_gid) ||
- get_user(bstat->bs_rdev, &bstat32->bs_rdev) ||
- get_user(bstat->bs_blksize, &bstat32->bs_blksize) ||
- get_user(bstat->bs_size, &bstat32->bs_size) ||
- xfs_ioctl32_bstime_copyin(&bstat->bs_atime, &bstat32->bs_atime) ||
- xfs_ioctl32_bstime_copyin(&bstat->bs_mtime, &bstat32->bs_mtime) ||
- xfs_ioctl32_bstime_copyin(&bstat->bs_ctime, &bstat32->bs_ctime) ||
- get_user(bstat->bs_blocks, &bstat32->bs_size) ||
- get_user(bstat->bs_xflags, &bstat32->bs_size) ||
- get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
- get_user(bstat->bs_extents, &bstat32->bs_extents) ||
- get_user(bstat->bs_gen, &bstat32->bs_gen) ||
- get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
- get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
- get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
- get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
- get_user(bstat->bs_aextents, &bstat32->bs_aextents))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-/* XFS_IOC_FSBULKSTAT and friends */
-
-STATIC int
-xfs_bstime_store_compat(
- compat_xfs_bstime_t __user *p32,
- const xfs_bstime_t *p)
-{
- __s32 sec32;
-
- sec32 = p->tv_sec;
- if (put_user(sec32, &p32->tv_sec) ||
- put_user(p->tv_nsec, &p32->tv_nsec))
- return -XFS_ERROR(EFAULT);
- return 0;
-}
-
-/* Return 0 on success or positive error (to xfs_bulkstat()) */
-STATIC int
-xfs_bulkstat_one_fmt_compat(
- void __user *ubuffer,
- int ubsize,
- int *ubused,
- const xfs_bstat_t *buffer)
-{
- compat_xfs_bstat_t __user *p32 = ubuffer;
-
- if (ubsize < sizeof(*p32))
- return XFS_ERROR(ENOMEM);
-
- if (put_user(buffer->bs_ino, &p32->bs_ino) ||
- put_user(buffer->bs_mode, &p32->bs_mode) ||
- put_user(buffer->bs_nlink, &p32->bs_nlink) ||
- put_user(buffer->bs_uid, &p32->bs_uid) ||
- put_user(buffer->bs_gid, &p32->bs_gid) ||
- put_user(buffer->bs_rdev, &p32->bs_rdev) ||
- put_user(buffer->bs_blksize, &p32->bs_blksize) ||
- put_user(buffer->bs_size, &p32->bs_size) ||
- xfs_bstime_store_compat(&p32->bs_atime, &buffer->bs_atime) ||
- xfs_bstime_store_compat(&p32->bs_mtime, &buffer->bs_mtime) ||
- xfs_bstime_store_compat(&p32->bs_ctime, &buffer->bs_ctime) ||
- put_user(buffer->bs_blocks, &p32->bs_blocks) ||
- put_user(buffer->bs_xflags, &p32->bs_xflags) ||
- put_user(buffer->bs_extsize, &p32->bs_extsize) ||
- put_user(buffer->bs_extents, &p32->bs_extents) ||
- put_user(buffer->bs_gen, &p32->bs_gen) ||
- put_user(buffer->bs_projid, &p32->bs_projid) ||
- put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) ||
- put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
- put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
- put_user(buffer->bs_aextents, &p32->bs_aextents))
- return XFS_ERROR(EFAULT);
- if (ubused)
- *ubused = sizeof(*p32);
- return 0;
-}
-
-STATIC int
-xfs_bulkstat_one_compat(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* buffer to place output in */
- int ubsize, /* size of buffer */
- int *ubused, /* bytes used by me */
- int *stat) /* BULKSTAT_RV_... */
-{
- return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
- xfs_bulkstat_one_fmt_compat,
- ubused, stat);
-}
-
-/* copied from xfs_ioctl.c */
-STATIC int
-xfs_compat_ioc_bulkstat(
- xfs_mount_t *mp,
- unsigned int cmd,
- compat_xfs_fsop_bulkreq_t __user *p32)
-{
- u32 addr;
- xfs_fsop_bulkreq_t bulkreq;
- int count; /* # of records returned */
- xfs_ino_t inlast; /* last inode number */
- int done;
- int error;
-
- /* done = 1 if there are more stats to get and if bulkstat */
- /* should be called again (unused here, but used in dmapi) */
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (get_user(addr, &p32->lastip))
- return -XFS_ERROR(EFAULT);
- bulkreq.lastip = compat_ptr(addr);
- if (get_user(bulkreq.icount, &p32->icount) ||
- get_user(addr, &p32->ubuffer))
- return -XFS_ERROR(EFAULT);
- bulkreq.ubuffer = compat_ptr(addr);
- if (get_user(addr, &p32->ocount))
- return -XFS_ERROR(EFAULT);
- bulkreq.ocount = compat_ptr(addr);
-
- if (copy_from_user(&inlast, bulkreq.lastip, sizeof(__s64)))
- return -XFS_ERROR(EFAULT);
-
- if ((count = bulkreq.icount) <= 0)
- return -XFS_ERROR(EINVAL);
-
- if (bulkreq.ubuffer == NULL)
- return -XFS_ERROR(EINVAL);
-
- if (cmd == XFS_IOC_FSINUMBERS_32) {
- error = xfs_inumbers(mp, &inlast, &count,
- bulkreq.ubuffer, xfs_inumbers_fmt_compat);
- } else if (cmd == XFS_IOC_FSBULKSTAT_SINGLE_32) {
- int res;
-
- error = xfs_bulkstat_one_compat(mp, inlast, bulkreq.ubuffer,
- sizeof(compat_xfs_bstat_t), 0, &res);
- } else if (cmd == XFS_IOC_FSBULKSTAT_32) {
- error = xfs_bulkstat(mp, &inlast, &count,
- xfs_bulkstat_one_compat, sizeof(compat_xfs_bstat_t),
- bulkreq.ubuffer, &done);
- } else
- error = XFS_ERROR(EINVAL);
- if (error)
- return -error;
-
- if (bulkreq.ocount != NULL) {
- if (copy_to_user(bulkreq.lastip, &inlast,
- sizeof(xfs_ino_t)))
- return -XFS_ERROR(EFAULT);
-
- if (copy_to_user(bulkreq.ocount, &count, sizeof(count)))
- return -XFS_ERROR(EFAULT);
- }
-
- return 0;
-}
-
-STATIC int
-xfs_compat_handlereq_copyin(
- xfs_fsop_handlereq_t *hreq,
- compat_xfs_fsop_handlereq_t __user *arg32)
-{
- compat_xfs_fsop_handlereq_t hreq32;
-
- if (copy_from_user(&hreq32, arg32, sizeof(compat_xfs_fsop_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- hreq->fd = hreq32.fd;
- hreq->path = compat_ptr(hreq32.path);
- hreq->oflags = hreq32.oflags;
- hreq->ihandle = compat_ptr(hreq32.ihandle);
- hreq->ihandlen = hreq32.ihandlen;
- hreq->ohandle = compat_ptr(hreq32.ohandle);
- hreq->ohandlen = compat_ptr(hreq32.ohandlen);
-
- return 0;
-}
-
-STATIC struct dentry *
-xfs_compat_handlereq_to_dentry(
- struct file *parfilp,
- compat_xfs_fsop_handlereq_t *hreq)
-{
- return xfs_handle_to_dentry(parfilp,
- compat_ptr(hreq->ihandle), hreq->ihandlen);
-}
-
-STATIC int
-xfs_compat_attrlist_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- attrlist_cursor_kern_t *cursor;
- compat_xfs_fsop_attrlist_handlereq_t al_hreq;
- struct dentry *dentry;
- char *kbuf;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&al_hreq, arg,
- sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
- return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
- return -XFS_ERROR(EINVAL);
-
- /*
- * Reject flags, only allow namespaces.
- */
- if (al_hreq.flags & ~(ATTR_ROOT | ATTR_SECURE))
- return -XFS_ERROR(EINVAL);
-
- dentry = xfs_compat_handlereq_to_dentry(parfilp, &al_hreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- error = -ENOMEM;
- kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
- if (!kbuf)
- goto out_dput;
-
- cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
- error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
- al_hreq.flags, cursor);
- if (error)
- goto out_kfree;
-
- if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
- error = -EFAULT;
-
- out_kfree:
- kfree(kbuf);
- out_dput:
- dput(dentry);
- return error;
-}
-
-STATIC int
-xfs_compat_attrmulti_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- compat_xfs_attr_multiop_t *ops;
- compat_xfs_fsop_attrmulti_handlereq_t am_hreq;
- struct dentry *dentry;
- unsigned int i, size;
- unsigned char *attr_name;
-
- if (!capable(CAP_SYS_ADMIN))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&am_hreq, arg,
- sizeof(compat_xfs_fsop_attrmulti_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- /* overflow check */
- if (am_hreq.opcount >= INT_MAX / sizeof(compat_xfs_attr_multiop_t))
- return -E2BIG;
-
- dentry = xfs_compat_handlereq_to_dentry(parfilp, &am_hreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- error = E2BIG;
- size = am_hreq.opcount * sizeof(compat_xfs_attr_multiop_t);
- if (!size || size > 16 * PAGE_SIZE)
- goto out_dput;
-
- ops = memdup_user(compat_ptr(am_hreq.ops), size);
- if (IS_ERR(ops)) {
- error = PTR_ERR(ops);
- goto out_dput;
- }
-
- attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);
- if (!attr_name)
- goto out_kfree_ops;
-
- error = 0;
- for (i = 0; i < am_hreq.opcount; i++) {
- ops[i].am_error = strncpy_from_user((char *)attr_name,
- compat_ptr(ops[i].am_attrname),
- MAXNAMELEN);
- if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
- error = -ERANGE;
- if (ops[i].am_error < 0)
- break;
-
- switch (ops[i].am_opcode) {
- case ATTR_OP_GET:
- ops[i].am_error = xfs_attrmulti_attr_get(
- dentry->d_inode, attr_name,
- compat_ptr(ops[i].am_attrvalue),
- &ops[i].am_length, ops[i].am_flags);
- break;
- case ATTR_OP_SET:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
- if (ops[i].am_error)
- break;
- ops[i].am_error = xfs_attrmulti_attr_set(
- dentry->d_inode, attr_name,
- compat_ptr(ops[i].am_attrvalue),
- ops[i].am_length, ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
- break;
- case ATTR_OP_REMOVE:
- ops[i].am_error = mnt_want_write(parfilp->f_path.mnt);
- if (ops[i].am_error)
- break;
- ops[i].am_error = xfs_attrmulti_attr_remove(
- dentry->d_inode, attr_name,
- ops[i].am_flags);
- mnt_drop_write(parfilp->f_path.mnt);
- break;
- default:
- ops[i].am_error = EINVAL;
- }
- }
-
- if (copy_to_user(compat_ptr(am_hreq.ops), ops, size))
- error = XFS_ERROR(EFAULT);
-
- kfree(attr_name);
- out_kfree_ops:
- kfree(ops);
- out_dput:
- dput(dentry);
- return -error;
-}
-
-STATIC int
-xfs_compat_fssetdm_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- struct fsdmidata fsd;
- compat_xfs_fsop_setdm_handlereq_t dmhreq;
- struct dentry *dentry;
-
- if (!capable(CAP_MKNOD))
- return -XFS_ERROR(EPERM);
- if (copy_from_user(&dmhreq, arg,
- sizeof(compat_xfs_fsop_setdm_handlereq_t)))
- return -XFS_ERROR(EFAULT);
-
- dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
- error = -XFS_ERROR(EPERM);
- goto out;
- }
-
- if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
- error = -XFS_ERROR(EFAULT);
- goto out;
- }
-
- error = -xfs_set_dmattrs(XFS_I(dentry->d_inode), fsd.fsd_dmevmask,
- fsd.fsd_dmstate);
-
-out:
- dput(dentry);
- return error;
-}
-
-long
-xfs_file_compat_ioctl(
- struct file *filp,
- unsigned cmd,
- unsigned long p)
-{
- struct inode *inode = filp->f_path.dentry->d_inode;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- void __user *arg = (void __user *)p;
- int ioflags = 0;
- int error;
-
- if (filp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- trace_xfs_file_compat_ioctl(ip);
-
- switch (cmd) {
- /* No size or alignment issues on any arch */
- case XFS_IOC_DIOINFO:
- case XFS_IOC_FSGEOMETRY:
- case XFS_IOC_FSGETXATTR:
- case XFS_IOC_FSSETXATTR:
- case XFS_IOC_FSGETXATTRA:
- case XFS_IOC_FSSETDM:
- case XFS_IOC_GETBMAP:
- case XFS_IOC_GETBMAPA:
- case XFS_IOC_GETBMAPX:
- case XFS_IOC_FSCOUNTS:
- case XFS_IOC_SET_RESBLKS:
- case XFS_IOC_GET_RESBLKS:
- case XFS_IOC_FSGROWFSLOG:
- case XFS_IOC_GOINGDOWN:
- case XFS_IOC_ERROR_INJECTION:
- case XFS_IOC_ERROR_CLEARALL:
- return xfs_file_ioctl(filp, cmd, p);
-#ifndef BROKEN_X86_ALIGNMENT
- /* These are handled fine if no alignment issues */
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- case XFS_IOC_FSGEOMETRY_V1:
- case XFS_IOC_FSGROWFSDATA:
- case XFS_IOC_FSGROWFSRT:
- case XFS_IOC_ZERO_RANGE:
- return xfs_file_ioctl(filp, cmd, p);
-#else
- case XFS_IOC_ALLOCSP_32:
- case XFS_IOC_FREESP_32:
- case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32:
- case XFS_IOC_RESVSP_32:
- case XFS_IOC_UNRESVSP_32:
- case XFS_IOC_RESVSP64_32:
- case XFS_IOC_UNRESVSP64_32:
- case XFS_IOC_ZERO_RANGE_32: {
- struct xfs_flock64 bf;
-
- if (xfs_compat_flock64_copyin(&bf, arg))
- return -XFS_ERROR(EFAULT);
- cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- return xfs_ioc_space(ip, inode, filp, ioflags, cmd, &bf);
- }
- case XFS_IOC_FSGEOMETRY_V1_32:
- return xfs_compat_ioc_fsgeometry_v1(mp, arg);
- case XFS_IOC_FSGROWFSDATA_32: {
- struct xfs_growfs_data in;
-
- if (xfs_compat_growfs_data_copyin(&in, arg))
- return -XFS_ERROR(EFAULT);
- error = xfs_growfs_data(mp, &in);
- return -error;
- }
- case XFS_IOC_FSGROWFSRT_32: {
- struct xfs_growfs_rt in;
-
- if (xfs_compat_growfs_rt_copyin(&in, arg))
- return -XFS_ERROR(EFAULT);
- error = xfs_growfs_rt(mp, &in);
- return -error;
- }
-#endif
- /* long changes size, but xfs only copiese out 32 bits */
- case XFS_IOC_GETXFLAGS_32:
- case XFS_IOC_SETXFLAGS_32:
- case XFS_IOC_GETVERSION_32:
- cmd = _NATIVE_IOC(cmd, long);
- return xfs_file_ioctl(filp, cmd, p);
- case XFS_IOC_SWAPEXT_32: {
- struct xfs_swapext sxp;
- struct compat_xfs_swapext __user *sxu = arg;
-
- /* Bulk copy in up to the sx_stat field, then copy bstat */
- if (copy_from_user(&sxp, sxu,
- offsetof(struct xfs_swapext, sx_stat)) ||
- xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
- return -XFS_ERROR(EFAULT);
- error = xfs_swapext(&sxp);
- return -error;
- }
- case XFS_IOC_FSBULKSTAT_32:
- case XFS_IOC_FSBULKSTAT_SINGLE_32:
- case XFS_IOC_FSINUMBERS_32:
- return xfs_compat_ioc_bulkstat(mp, cmd, arg);
- case XFS_IOC_FD_TO_HANDLE_32:
- case XFS_IOC_PATH_TO_HANDLE_32:
- case XFS_IOC_PATH_TO_FSHANDLE_32: {
- struct xfs_fsop_handlereq hreq;
-
- if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
- cmd = _NATIVE_IOC(cmd, struct xfs_fsop_handlereq);
- return xfs_find_handle(cmd, &hreq);
- }
- case XFS_IOC_OPEN_BY_HANDLE_32: {
- struct xfs_fsop_handlereq hreq;
-
- if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
- return xfs_open_by_handle(filp, &hreq);
- }
- case XFS_IOC_READLINK_BY_HANDLE_32: {
- struct xfs_fsop_handlereq hreq;
-
- if (xfs_compat_handlereq_copyin(&hreq, arg))
- return -XFS_ERROR(EFAULT);
- return xfs_readlink_by_handle(filp, &hreq);
- }
- case XFS_IOC_ATTRLIST_BY_HANDLE_32:
- return xfs_compat_attrlist_by_handle(filp, arg);
- case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
- return xfs_compat_attrmulti_by_handle(filp, arg);
- case XFS_IOC_FSSETDM_BY_HANDLE_32:
- return xfs_compat_fssetdm_by_handle(filp, arg);
- default:
- return -XFS_ERROR(ENOIOCTLCMD);
- }
-}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
deleted file mode 100644
index 80f4060..0000000
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright (c) 2004-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_IOCTL32_H__
-#define __XFS_IOCTL32_H__
-
-#include <linux/compat.h>
-
-/*
- * on 32-bit arches, ioctl argument structures may have different sizes
- * and/or alignment. We define compat structures which match the
- * 32-bit sizes/alignments here, and their associated ioctl numbers.
- *
- * xfs_ioctl32.c contains routines to copy these structures in and out.
- */
-
-/* stock kernel-level ioctls we support */
-#define XFS_IOC_GETXFLAGS_32 FS_IOC32_GETFLAGS
-#define XFS_IOC_SETXFLAGS_32 FS_IOC32_SETFLAGS
-#define XFS_IOC_GETVERSION_32 FS_IOC32_GETVERSION
-
-/*
- * On intel, even if sizes match, alignment and/or padding may differ.
- */
-#if defined(CONFIG_IA64) || defined(CONFIG_X86_64)
-#define BROKEN_X86_ALIGNMENT
-#define __compat_packed __attribute__((packed))
-#else
-#define __compat_packed
-#endif
-
-typedef struct compat_xfs_bstime {
- compat_time_t tv_sec; /* seconds */
- __s32 tv_nsec; /* and nanoseconds */
-} compat_xfs_bstime_t;
-
-typedef struct compat_xfs_bstat {
- __u64 bs_ino; /* inode number */
- __u16 bs_mode; /* type and mode */
- __u16 bs_nlink; /* number of links */
- __u32 bs_uid; /* user id */
- __u32 bs_gid; /* group id */
- __u32 bs_rdev; /* device value */
- __s32 bs_blksize; /* block size */
- __s64 bs_size; /* file size */
- compat_xfs_bstime_t bs_atime; /* access time */
- compat_xfs_bstime_t bs_mtime; /* modify time */
- compat_xfs_bstime_t bs_ctime; /* inode change time */
- int64_t bs_blocks; /* number of blocks */
- __u32 bs_xflags; /* extended flags */
- __s32 bs_extsize; /* extent size */
- __s32 bs_extents; /* number of extents */
- __u32 bs_gen; /* generation count */
- __u16 bs_projid_lo; /* lower part of project id */
-#define bs_projid bs_projid_lo /* (previously just bs_projid) */
- __u16 bs_projid_hi; /* high part of project id */
- unsigned char bs_pad[12]; /* pad space, unused */
- __u32 bs_dmevmask; /* DMIG event mask */
- __u16 bs_dmstate; /* DMIG state info */
- __u16 bs_aextents; /* attribute number of extents */
-} __compat_packed compat_xfs_bstat_t;
-
-typedef struct compat_xfs_fsop_bulkreq {
- compat_uptr_t lastip; /* last inode # pointer */
- __s32 icount; /* count of entries in buffer */
- compat_uptr_t ubuffer; /* user buffer for inode desc. */
- compat_uptr_t ocount; /* output count pointer */
-} compat_xfs_fsop_bulkreq_t;
-
-#define XFS_IOC_FSBULKSTAT_32 \
- _IOWR('X', 101, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSBULKSTAT_SINGLE_32 \
- _IOWR('X', 102, struct compat_xfs_fsop_bulkreq)
-#define XFS_IOC_FSINUMBERS_32 \
- _IOWR('X', 103, struct compat_xfs_fsop_bulkreq)
-
-typedef struct compat_xfs_fsop_handlereq {
- __u32 fd; /* fd for FD_TO_HANDLE */
- compat_uptr_t path; /* user pathname */
- __u32 oflags; /* open flags */
- compat_uptr_t ihandle; /* user supplied handle */
- __u32 ihandlen; /* user supplied length */
- compat_uptr_t ohandle; /* user buffer for handle */
- compat_uptr_t ohandlen; /* user buffer length */
-} compat_xfs_fsop_handlereq_t;
-
-#define XFS_IOC_PATH_TO_FSHANDLE_32 \
- _IOWR('X', 104, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_PATH_TO_HANDLE_32 \
- _IOWR('X', 105, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_FD_TO_HANDLE_32 \
- _IOWR('X', 106, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_OPEN_BY_HANDLE_32 \
- _IOWR('X', 107, struct compat_xfs_fsop_handlereq)
-#define XFS_IOC_READLINK_BY_HANDLE_32 \
- _IOWR('X', 108, struct compat_xfs_fsop_handlereq)
-
-/* The bstat field in the swapext struct needs translation */
-typedef struct compat_xfs_swapext {
- __int64_t sx_version; /* version */
- __int64_t sx_fdtarget; /* fd of target file */
- __int64_t sx_fdtmp; /* fd of tmp file */
- xfs_off_t sx_offset; /* offset into file */
- xfs_off_t sx_length; /* leng from offset */
- char sx_pad[16]; /* pad space, unused */
- compat_xfs_bstat_t sx_stat; /* stat of target b4 copy */
-} __compat_packed compat_xfs_swapext_t;
-
-#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
-
-typedef struct compat_xfs_fsop_attrlist_handlereq {
- struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
- struct xfs_attrlist_cursor pos; /* opaque cookie, list offset */
- __u32 flags; /* which namespace to use */
- __u32 buflen; /* length of buffer supplied */
- compat_uptr_t buffer; /* returned names */
-} __compat_packed compat_xfs_fsop_attrlist_handlereq_t;
-
-/* Note: actually this is read/write */
-#define XFS_IOC_ATTRLIST_BY_HANDLE_32 \
- _IOW('X', 122, struct compat_xfs_fsop_attrlist_handlereq)
-
-/* am_opcodes defined in xfs_fs.h */
-typedef struct compat_xfs_attr_multiop {
- __u32 am_opcode;
- __s32 am_error;
- compat_uptr_t am_attrname;
- compat_uptr_t am_attrvalue;
- __u32 am_length;
- __u32 am_flags;
-} compat_xfs_attr_multiop_t;
-
-typedef struct compat_xfs_fsop_attrmulti_handlereq {
- struct compat_xfs_fsop_handlereq hreq; /* handle interface structure */
- __u32 opcount;/* count of following multiop */
- /* ptr to compat_xfs_attr_multiop */
- compat_uptr_t ops; /* attr_multi data */
-} compat_xfs_fsop_attrmulti_handlereq_t;
-
-#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
- _IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
-
-typedef struct compat_xfs_fsop_setdm_handlereq {
- struct compat_xfs_fsop_handlereq hreq; /* handle information */
- /* ptr to struct fsdmidata */
- compat_uptr_t data; /* DMAPI data */
-} compat_xfs_fsop_setdm_handlereq_t;
-
-#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
- _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
-
-#ifdef BROKEN_X86_ALIGNMENT
-/* on ia32 l_start is on a 32-bit boundary */
-typedef struct compat_xfs_flock64 {
- __s16 l_type;
- __s16 l_whence;
- __s64 l_start __attribute__((packed));
- /* len == 0 means until end of file */
- __s64 l_len __attribute__((packed));
- __s32 l_sysid;
- __u32 l_pid;
- __s32 l_pad[4]; /* reserve area */
-} compat_xfs_flock64_t;
-
-#define XFS_IOC_ALLOCSP_32 _IOW('X', 10, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP_32 _IOW('X', 11, struct compat_xfs_flock64)
-#define XFS_IOC_ALLOCSP64_32 _IOW('X', 36, struct compat_xfs_flock64)
-#define XFS_IOC_FREESP64_32 _IOW('X', 37, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP_32 _IOW('X', 40, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64)
-#define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64)
-#define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64)
-#define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64)
-
-typedef struct compat_xfs_fsop_geom_v1 {
- __u32 blocksize; /* filesystem (data) block size */
- __u32 rtextsize; /* realtime extent size */
- __u32 agblocks; /* fsblocks in an AG */
- __u32 agcount; /* number of allocation groups */
- __u32 logblocks; /* fsblocks in the log */
- __u32 sectsize; /* (data) sector size, bytes */
- __u32 inodesize; /* inode size in bytes */
- __u32 imaxpct; /* max allowed inode space(%) */
- __u64 datablocks; /* fsblocks in data subvolume */
- __u64 rtblocks; /* fsblocks in realtime subvol */
- __u64 rtextents; /* rt extents in realtime subvol*/
- __u64 logstart; /* starting fsblock of the log */
- unsigned char uuid[16]; /* unique id of the filesystem */
- __u32 sunit; /* stripe unit, fsblocks */
- __u32 swidth; /* stripe width, fsblocks */
- __s32 version; /* structure version */
- __u32 flags; /* superblock version flags */
- __u32 logsectsize; /* log sector size, bytes */
- __u32 rtsectsize; /* realtime sector size, bytes */
- __u32 dirblocksize; /* directory block size, bytes */
-} __attribute__((packed)) compat_xfs_fsop_geom_v1_t;
-
-#define XFS_IOC_FSGEOMETRY_V1_32 \
- _IOR('X', 100, struct compat_xfs_fsop_geom_v1)
-
-typedef struct compat_xfs_inogrp {
- __u64 xi_startino; /* starting inode number */
- __s32 xi_alloccount; /* # bits set in allocmask */
- __u64 xi_allocmask; /* mask of allocated inodes */
-} __attribute__((packed)) compat_xfs_inogrp_t;
-
-/* These growfs input structures have padding on the end, so must translate */
-typedef struct compat_xfs_growfs_data {
- __u64 newblocks; /* new data subvol size, fsblocks */
- __u32 imaxpct; /* new inode space percentage limit */
-} __attribute__((packed)) compat_xfs_growfs_data_t;
-
-typedef struct compat_xfs_growfs_rt {
- __u64 newblocks; /* new realtime size, fsblocks */
- __u32 extsize; /* new realtime extent size, fsblocks */
-} __attribute__((packed)) compat_xfs_growfs_rt_t;
-
-#define XFS_IOC_FSGROWFSDATA_32 _IOW('X', 110, struct compat_xfs_growfs_data)
-#define XFS_IOC_FSGROWFSRT_32 _IOW('X', 112, struct compat_xfs_growfs_rt)
-
-#endif /* BROKEN_X86_ALIGNMENT */
-
-#endif /* __XFS_IOCTL32_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
deleted file mode 100644
index f5b697b..0000000
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ /dev/null
@@ -1,778 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_acl.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_rw.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_trace.h"
-
-#include <linux/capability.h>
-#include <linux/xattr.h>
-#include <linux/namei.h>
-#include <linux/posix_acl.h>
-#include <linux/security.h>
-#include <linux/fiemap.h>
-#include <linux/slab.h>
-
-/*
- * Bring the timestamps in the XFS inode uptodate.
- *
- * Used before writing the inode to disk.
- */
-void
-xfs_synchronize_times(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
- ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
- ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-}
-
-/*
- * If the linux inode is valid, mark it dirty, else mark the dirty state
- * in the XFS inode to make sure we pick it up when reclaiming the inode.
- */
-void
-xfs_mark_inode_dirty_sync(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
- mark_inode_dirty_sync(inode);
- else {
- barrier();
- ip->i_update_core = 1;
- }
-}
-
-void
-xfs_mark_inode_dirty(
- xfs_inode_t *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
- mark_inode_dirty(inode);
- else {
- barrier();
- ip->i_update_core = 1;
- }
-
-}
-
-/*
- * Hook in SELinux. This is not quite correct yet, what we really need
- * here (as we do for default ACLs) is a mechanism by which creation of
- * these attrs can be journalled at inode creation time (along with the
- * inode, of course, such that log replay can't cause these to be lost).
- */
-STATIC int
-xfs_init_security(
- struct inode *inode,
- struct inode *dir,
- const struct qstr *qstr)
-{
- struct xfs_inode *ip = XFS_I(inode);
- size_t length;
- void *value;
- unsigned char *name;
- int error;
-
- error = security_inode_init_security(inode, dir, qstr, (char **)&name,
- &value, &length);
- if (error) {
- if (error == -EOPNOTSUPP)
- return 0;
- return -error;
- }
-
- error = xfs_attr_set(ip, name, value, length, ATTR_SECURE);
-
- kfree(name);
- kfree(value);
- return error;
-}
-
-static void
-xfs_dentry_to_name(
- struct xfs_name *namep,
- struct dentry *dentry)
-{
- namep->name = dentry->d_name.name;
- namep->len = dentry->d_name.len;
-}
-
-STATIC void
-xfs_cleanup_inode(
- struct inode *dir,
- struct inode *inode,
- struct dentry *dentry)
-{
- struct xfs_name teardown;
-
- /* Oh, the horror.
- * If we can't add the ACL or we fail in
- * xfs_init_security we must back out.
- * ENOSPC can hit here, among other things.
- */
- xfs_dentry_to_name(&teardown, dentry);
-
- xfs_remove(XFS_I(dir), &teardown, XFS_I(inode));
- iput(inode);
-}
-
-STATIC int
-xfs_vn_mknod(
- struct inode *dir,
- struct dentry *dentry,
- int mode,
- dev_t rdev)
-{
- struct inode *inode;
- struct xfs_inode *ip = NULL;
- struct posix_acl *default_acl = NULL;
- struct xfs_name name;
- int error;
-
- /*
- * Irix uses Missed'em'V split, but doesn't want to see
- * the upper 5 bits of (14bit) major.
- */
- if (S_ISCHR(mode) || S_ISBLK(mode)) {
- if (unlikely(!sysv_valid_dev(rdev) || MAJOR(rdev) & ~0x1ff))
- return -EINVAL;
- rdev = sysv_encode_dev(rdev);
- } else {
- rdev = 0;
- }
-
- if (IS_POSIXACL(dir)) {
- default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT);
- if (IS_ERR(default_acl))
- return PTR_ERR(default_acl);
-
- if (!default_acl)
- mode &= ~current_umask();
- }
-
- xfs_dentry_to_name(&name, dentry);
- error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
- if (unlikely(error))
- goto out_free_acl;
-
- inode = VFS_I(ip);
-
- error = xfs_init_security(inode, dir, &dentry->d_name);
- if (unlikely(error))
- goto out_cleanup_inode;
-
- if (default_acl) {
- error = -xfs_inherit_acl(inode, default_acl);
- if (unlikely(error))
- goto out_cleanup_inode;
- posix_acl_release(default_acl);
- }
-
-
- d_instantiate(dentry, inode);
- return -error;
-
- out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry);
- out_free_acl:
- posix_acl_release(default_acl);
- return -error;
-}
-
-STATIC int
-xfs_vn_create(
- struct inode *dir,
- struct dentry *dentry,
- int mode,
- struct nameidata *nd)
-{
- return xfs_vn_mknod(dir, dentry, mode, 0);
-}
-
-STATIC int
-xfs_vn_mkdir(
- struct inode *dir,
- struct dentry *dentry,
- int mode)
-{
- return xfs_vn_mknod(dir, dentry, mode|S_IFDIR, 0);
-}
-
-STATIC struct dentry *
-xfs_vn_lookup(
- struct inode *dir,
- struct dentry *dentry,
- struct nameidata *nd)
-{
- struct xfs_inode *cip;
- struct xfs_name name;
- int error;
-
- if (dentry->d_name.len >= MAXNAMELEN)
- return ERR_PTR(-ENAMETOOLONG);
-
- xfs_dentry_to_name(&name, dentry);
- error = xfs_lookup(XFS_I(dir), &name, &cip, NULL);
- if (unlikely(error)) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
- d_add(dentry, NULL);
- return NULL;
- }
-
- return d_splice_alias(VFS_I(cip), dentry);
-}
-
-STATIC struct dentry *
-xfs_vn_ci_lookup(
- struct inode *dir,
- struct dentry *dentry,
- struct nameidata *nd)
-{
- struct xfs_inode *ip;
- struct xfs_name xname;
- struct xfs_name ci_name;
- struct qstr dname;
- int error;
-
- if (dentry->d_name.len >= MAXNAMELEN)
- return ERR_PTR(-ENAMETOOLONG);
-
- xfs_dentry_to_name(&xname, dentry);
- error = xfs_lookup(XFS_I(dir), &xname, &ip, &ci_name);
- if (unlikely(error)) {
- if (unlikely(error != ENOENT))
- return ERR_PTR(-error);
- /*
- * call d_add(dentry, NULL) here when d_drop_negative_children
- * is called in xfs_vn_mknod (ie. allow negative dentries
- * with CI filesystems).
- */
- return NULL;
- }
-
- /* if exact match, just splice and exit */
- if (!ci_name.name)
- return d_splice_alias(VFS_I(ip), dentry);
-
- /* else case-insensitive match... */
- dname.name = ci_name.name;
- dname.len = ci_name.len;
- dentry = d_add_ci(dentry, VFS_I(ip), &dname);
- kmem_free(ci_name.name);
- return dentry;
-}
-
-STATIC int
-xfs_vn_link(
- struct dentry *old_dentry,
- struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *inode = old_dentry->d_inode;
- struct xfs_name name;
- int error;
-
- xfs_dentry_to_name(&name, dentry);
-
- error = xfs_link(XFS_I(dir), XFS_I(inode), &name);
- if (unlikely(error))
- return -error;
-
- ihold(inode);
- d_instantiate(dentry, inode);
- return 0;
-}
-
-STATIC int
-xfs_vn_unlink(
- struct inode *dir,
- struct dentry *dentry)
-{
- struct xfs_name name;
- int error;
-
- xfs_dentry_to_name(&name, dentry);
-
- error = -xfs_remove(XFS_I(dir), &name, XFS_I(dentry->d_inode));
- if (error)
- return error;
-
- /*
- * With unlink, the VFS makes the dentry "negative": no inode,
- * but still hashed. This is incompatible with case-insensitive
- * mode, so invalidate (unhash) the dentry in CI-mode.
- */
- if (xfs_sb_version_hasasciici(&XFS_M(dir->i_sb)->m_sb))
- d_invalidate(dentry);
- return 0;
-}
-
-STATIC int
-xfs_vn_symlink(
- struct inode *dir,
- struct dentry *dentry,
- const char *symname)
-{
- struct inode *inode;
- struct xfs_inode *cip = NULL;
- struct xfs_name name;
- int error;
- mode_t mode;
-
- mode = S_IFLNK |
- (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
- xfs_dentry_to_name(&name, dentry);
-
- error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
- if (unlikely(error))
- goto out;
-
- inode = VFS_I(cip);
-
- error = xfs_init_security(inode, dir, &dentry->d_name);
- if (unlikely(error))
- goto out_cleanup_inode;
-
- d_instantiate(dentry, inode);
- return 0;
-
- out_cleanup_inode:
- xfs_cleanup_inode(dir, inode, dentry);
- out:
- return -error;
-}
-
-STATIC int
-xfs_vn_rename(
- struct inode *odir,
- struct dentry *odentry,
- struct inode *ndir,
- struct dentry *ndentry)
-{
- struct inode *new_inode = ndentry->d_inode;
- struct xfs_name oname;
- struct xfs_name nname;
-
- xfs_dentry_to_name(&oname, odentry);
- xfs_dentry_to_name(&nname, ndentry);
-
- return -xfs_rename(XFS_I(odir), &oname, XFS_I(odentry->d_inode),
- XFS_I(ndir), &nname, new_inode ?
- XFS_I(new_inode) : NULL);
-}
-
-/*
- * careful here - this function can get called recursively, so
- * we need to be very careful about how much stack we use.
- * uio is kmalloced for this reason...
- */
-STATIC void *
-xfs_vn_follow_link(
- struct dentry *dentry,
- struct nameidata *nd)
-{
- char *link;
- int error = -ENOMEM;
-
- link = kmalloc(MAXPATHLEN+1, GFP_KERNEL);
- if (!link)
- goto out_err;
-
- error = -xfs_readlink(XFS_I(dentry->d_inode), link);
- if (unlikely(error))
- goto out_kfree;
-
- nd_set_link(nd, link);
- return NULL;
-
- out_kfree:
- kfree(link);
- out_err:
- nd_set_link(nd, ERR_PTR(error));
- return NULL;
-}
-
-STATIC void
-xfs_vn_put_link(
- struct dentry *dentry,
- struct nameidata *nd,
- void *p)
-{
- char *s = nd_get_link(nd);
-
- if (!IS_ERR(s))
- kfree(s);
-}
-
-STATIC int
-xfs_vn_getattr(
- struct vfsmount *mnt,
- struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
-
- trace_xfs_getattr(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- stat->size = XFS_ISIZE(ip);
- stat->dev = inode->i_sb->s_dev;
- stat->mode = ip->i_d.di_mode;
- stat->nlink = ip->i_d.di_nlink;
- stat->uid = ip->i_d.di_uid;
- stat->gid = ip->i_d.di_gid;
- stat->ino = ip->i_ino;
- stat->atime = inode->i_atime;
- stat->mtime = inode->i_mtime;
- stat->ctime = inode->i_ctime;
- stat->blocks =
- XFS_FSB_TO_BB(mp, ip->i_d.di_nblocks + ip->i_delayed_blks);
-
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- stat->blksize = BLKDEV_IOSIZE;
- stat->rdev = MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
- sysv_minor(ip->i_df.if_u2.if_rdev));
- break;
- default:
- if (XFS_IS_REALTIME_INODE(ip)) {
- /*
- * If the file blocks are being allocated from a
- * realtime volume, then return the inode's realtime
- * extent size or the realtime volume's extent size.
- */
- stat->blksize =
- xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
- } else
- stat->blksize = xfs_preferred_iosize(mp);
- stat->rdev = 0;
- break;
- }
-
- return 0;
-}
-
-STATIC int
-xfs_vn_setattr(
- struct dentry *dentry,
- struct iattr *iattr)
-{
- return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
-}
-
-#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
-
-/*
- * Call fiemap helper to fill in user data.
- * Returns positive errors to xfs_getbmap.
- */
-STATIC int
-xfs_fiemap_format(
- void **arg,
- struct getbmapx *bmv,
- int *full)
-{
- int error;
- struct fiemap_extent_info *fieinfo = *arg;
- u32 fiemap_flags = 0;
- u64 logical, physical, length;
-
- /* Do nothing for a hole */
- if (bmv->bmv_block == -1LL)
- return 0;
-
- logical = BBTOB(bmv->bmv_offset);
- physical = BBTOB(bmv->bmv_block);
- length = BBTOB(bmv->bmv_length);
-
- if (bmv->bmv_oflags & BMV_OF_PREALLOC)
- fiemap_flags |= FIEMAP_EXTENT_UNWRITTEN;
- else if (bmv->bmv_oflags & BMV_OF_DELALLOC) {
- fiemap_flags |= FIEMAP_EXTENT_DELALLOC;
- physical = 0; /* no block yet */
- }
- if (bmv->bmv_oflags & BMV_OF_LAST)
- fiemap_flags |= FIEMAP_EXTENT_LAST;
-
- error = fiemap_fill_next_extent(fieinfo, logical, physical,
- length, fiemap_flags);
- if (error > 0) {
- error = 0;
- *full = 1; /* user array now full */
- }
-
- return -error;
-}
-
-STATIC int
-xfs_vn_fiemap(
- struct inode *inode,
- struct fiemap_extent_info *fieinfo,
- u64 start,
- u64 length)
-{
- xfs_inode_t *ip = XFS_I(inode);
- struct getbmapx bm;
- int error;
-
- error = fiemap_check_flags(fieinfo, XFS_FIEMAP_FLAGS);
- if (error)
- return error;
-
- /* Set up bmap header for xfs internal routine */
- bm.bmv_offset = BTOBB(start);
- /* Special case for whole file */
- if (length == FIEMAP_MAX_OFFSET)
- bm.bmv_length = -1LL;
- else
- bm.bmv_length = BTOBB(length);
-
- /* We add one because in getbmap world count includes the header */
- bm.bmv_count = !fieinfo->fi_extents_max ? MAXEXTNUM :
- fieinfo->fi_extents_max + 1;
- bm.bmv_count = min_t(__s32, bm.bmv_count,
- (PAGE_SIZE * 16 / sizeof(struct getbmapx)));
- bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
- if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
- bm.bmv_iflags |= BMV_IF_ATTRFORK;
- if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
- bm.bmv_iflags |= BMV_IF_DELALLOC;
-
- error = xfs_getbmap(ip, &bm, xfs_fiemap_format, fieinfo);
- if (error)
- return -error;
-
- return 0;
-}
-
-static const struct inode_operations xfs_inode_operations = {
- .check_acl = xfs_check_acl,
- .getattr = xfs_vn_getattr,
- .setattr = xfs_vn_setattr,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = xfs_vn_listxattr,
- .fiemap = xfs_vn_fiemap,
-};
-
-static const struct inode_operations xfs_dir_inode_operations = {
- .create = xfs_vn_create,
- .lookup = xfs_vn_lookup,
- .link = xfs_vn_link,
- .unlink = xfs_vn_unlink,
- .symlink = xfs_vn_symlink,
- .mkdir = xfs_vn_mkdir,
- /*
- * Yes, XFS uses the same method for rmdir and unlink.
- *
- * There are some subtile differences deeper in the code,
- * but we use S_ISDIR to check for those.
- */
- .rmdir = xfs_vn_unlink,
- .mknod = xfs_vn_mknod,
- .rename = xfs_vn_rename,
- .check_acl = xfs_check_acl,
- .getattr = xfs_vn_getattr,
- .setattr = xfs_vn_setattr,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_dir_ci_inode_operations = {
- .create = xfs_vn_create,
- .lookup = xfs_vn_ci_lookup,
- .link = xfs_vn_link,
- .unlink = xfs_vn_unlink,
- .symlink = xfs_vn_symlink,
- .mkdir = xfs_vn_mkdir,
- /*
- * Yes, XFS uses the same method for rmdir and unlink.
- *
- * There are some subtile differences deeper in the code,
- * but we use S_ISDIR to check for those.
- */
- .rmdir = xfs_vn_unlink,
- .mknod = xfs_vn_mknod,
- .rename = xfs_vn_rename,
- .check_acl = xfs_check_acl,
- .getattr = xfs_vn_getattr,
- .setattr = xfs_vn_setattr,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = xfs_vn_listxattr,
-};
-
-static const struct inode_operations xfs_symlink_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = xfs_vn_follow_link,
- .put_link = xfs_vn_put_link,
- .check_acl = xfs_check_acl,
- .getattr = xfs_vn_getattr,
- .setattr = xfs_vn_setattr,
- .setxattr = generic_setxattr,
- .getxattr = generic_getxattr,
- .removexattr = generic_removexattr,
- .listxattr = xfs_vn_listxattr,
-};
-
-STATIC void
-xfs_diflags_to_iflags(
- struct inode *inode,
- struct xfs_inode *ip)
-{
- if (ip->i_d.di_flags & XFS_DIFLAG_IMMUTABLE)
- inode->i_flags |= S_IMMUTABLE;
- else
- inode->i_flags &= ~S_IMMUTABLE;
- if (ip->i_d.di_flags & XFS_DIFLAG_APPEND)
- inode->i_flags |= S_APPEND;
- else
- inode->i_flags &= ~S_APPEND;
- if (ip->i_d.di_flags & XFS_DIFLAG_SYNC)
- inode->i_flags |= S_SYNC;
- else
- inode->i_flags &= ~S_SYNC;
- if (ip->i_d.di_flags & XFS_DIFLAG_NOATIME)
- inode->i_flags |= S_NOATIME;
- else
- inode->i_flags &= ~S_NOATIME;
-}
-
-/*
- * Initialize the Linux inode, set up the operation vectors and
- * unlock the inode.
- *
- * When reading existing inodes from disk this is called directly
- * from xfs_iget, when creating a new inode it is called from
- * xfs_ialloc after setting up the inode.
- *
- * We are always called with an uninitialised linux inode here.
- * We need to initialise the necessary fields and take a reference
- * on it.
- */
-void
-xfs_setup_inode(
- struct xfs_inode *ip)
-{
- struct inode *inode = &ip->i_vnode;
-
- inode->i_ino = ip->i_ino;
- inode->i_state = I_NEW;
-
- inode_sb_list_add(inode);
- /* make the inode look hashed for the writeback code */
- hlist_add_fake(&inode->i_hash);
-
- inode->i_mode = ip->i_d.di_mode;
- inode->i_nlink = ip->i_d.di_nlink;
- inode->i_uid = ip->i_d.di_uid;
- inode->i_gid = ip->i_d.di_gid;
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFBLK:
- case S_IFCHR:
- inode->i_rdev =
- MKDEV(sysv_major(ip->i_df.if_u2.if_rdev) & 0x1ff,
- sysv_minor(ip->i_df.if_u2.if_rdev));
- break;
- default:
- inode->i_rdev = 0;
- break;
- }
-
- inode->i_generation = ip->i_d.di_gen;
- i_size_write(inode, ip->i_d.di_size);
- inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
- inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
- inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
- xfs_diflags_to_iflags(inode, ip);
-
- switch (inode->i_mode & S_IFMT) {
- case S_IFREG:
- inode->i_op = &xfs_inode_operations;
- inode->i_fop = &xfs_file_operations;
- inode->i_mapping->a_ops = &xfs_address_space_operations;
- break;
- case S_IFDIR:
- if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
- inode->i_op = &xfs_dir_ci_inode_operations;
- else
- inode->i_op = &xfs_dir_inode_operations;
- inode->i_fop = &xfs_dir_file_operations;
- break;
- case S_IFLNK:
- inode->i_op = &xfs_symlink_inode_operations;
- if (!(ip->i_df.if_flags & XFS_IFINLINE))
- inode->i_mapping->a_ops = &xfs_address_space_operations;
- break;
- default:
- inode->i_op = &xfs_inode_operations;
- init_special_inode(inode, inode->i_mode, inode->i_rdev);
- break;
- }
-
- xfs_iflags_clear(ip, XFS_INEW);
- barrier();
-
- unlock_new_inode(inode);
-}
diff --git a/fs/xfs/linux-2.6/xfs_iops.h b/fs/xfs/linux-2.6/xfs_iops.h
deleted file mode 100644
index ef41c92..0000000
--- a/fs/xfs/linux-2.6/xfs_iops.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_IOPS_H__
-#define __XFS_IOPS_H__
-
-struct xfs_inode;
-
-extern const struct file_operations xfs_file_operations;
-extern const struct file_operations xfs_dir_file_operations;
-
-extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
-
-extern void xfs_setup_inode(struct xfs_inode *);
-
-#endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
deleted file mode 100644
index 8731516..0000000
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ /dev/null
@@ -1,307 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_LINUX__
-#define __XFS_LINUX__
-
-#include <linux/types.h>
-
-/*
- * XFS_BIG_BLKNOS needs block layer disk addresses to be 64 bits.
- * XFS_BIG_INUMS requires XFS_BIG_BLKNOS to be set.
- */
-#if defined(CONFIG_LBDAF) || (BITS_PER_LONG == 64)
-# define XFS_BIG_BLKNOS 1
-# define XFS_BIG_INUMS 1
-#else
-# define XFS_BIG_BLKNOS 0
-# define XFS_BIG_INUMS 0
-#endif
-
-#include <xfs_types.h>
-#include <xfs_arch.h>
-
-#include <kmem.h>
-#include <mrlock.h>
-#include <time.h>
-
-#include <support/uuid.h>
-
-#include <linux/semaphore.h>
-#include <linux/mm.h>
-#include <linux/kernel.h>
-#include <linux/blkdev.h>
-#include <linux/slab.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/file.h>
-#include <linux/swap.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/bitops.h>
-#include <linux/major.h>
-#include <linux/pagemap.h>
-#include <linux/vfs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/proc_fs.h>
-#include <linux/sort.h>
-#include <linux/cpu.h>
-#include <linux/notifier.h>
-#include <linux/delay.h>
-#include <linux/log2.h>
-#include <linux/spinlock.h>
-#include <linux/random.h>
-#include <linux/ctype.h>
-#include <linux/writeback.h>
-#include <linux/capability.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/list_sort.h>
-
-#include <asm/page.h>
-#include <asm/div64.h>
-#include <asm/param.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <asm/unaligned.h>
-
-#include <xfs_vnode.h>
-#include <xfs_stats.h>
-#include <xfs_sysctl.h>
-#include <xfs_iops.h>
-#include <xfs_aops.h>
-#include <xfs_super.h>
-#include <xfs_buf.h>
-#include <xfs_message.h>
-
-/*
- * Feature macros (disable/enable)
- */
-#ifdef CONFIG_SMP
-#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#else
-#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
-#endif
-
-#define irix_sgid_inherit xfs_params.sgid_inherit.val
-#define irix_symlink_mode xfs_params.symlink_mode.val
-#define xfs_panic_mask xfs_params.panic_mask.val
-#define xfs_error_level xfs_params.error_level.val
-#define xfs_syncd_centisecs xfs_params.syncd_timer.val
-#define xfs_stats_clear xfs_params.stats_clear.val
-#define xfs_inherit_sync xfs_params.inherit_sync.val
-#define xfs_inherit_nodump xfs_params.inherit_nodump.val
-#define xfs_inherit_noatime xfs_params.inherit_noatim.val
-#define xfs_buf_timer_centisecs xfs_params.xfs_buf_timer.val
-#define xfs_buf_age_centisecs xfs_params.xfs_buf_age.val
-#define xfs_inherit_nosymlinks xfs_params.inherit_nosym.val
-#define xfs_rotorstep xfs_params.rotorstep.val
-#define xfs_inherit_nodefrag xfs_params.inherit_nodfrg.val
-#define xfs_fstrm_centisecs xfs_params.fstrm_timer.val
-
-#define current_cpu() (raw_smp_processor_id())
-#define current_pid() (current->pid)
-#define current_test_flags(f) (current->flags & (f))
-#define current_set_flags_nested(sp, f) \
- (*(sp) = current->flags, current->flags |= (f))
-#define current_clear_flags_nested(sp, f) \
- (*(sp) = current->flags, current->flags &= ~(f))
-#define current_restore_flags_nested(sp, f) \
- (current->flags = ((current->flags & ~(f)) | (*(sp) & (f))))
-
-#define spinlock_destroy(lock)
-
-#define NBBY 8 /* number of bits per byte */
-
-/*
- * Size of block device i/o is parameterized here.
- * Currently the system supports page-sized i/o.
- */
-#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT
-#define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT)
-/* number of BB's per block device block */
-#define BLKDEV_BB BTOBB(BLKDEV_IOSIZE)
-
-#define ENOATTR ENODATA /* Attribute not found */
-#define EWRONGFS EINVAL /* Mount with wrong filesystem type */
-#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */
-
-#define SYNCHRONIZE() barrier()
-#define __return_address __builtin_return_address(0)
-
-#define XFS_PROJID_DEFAULT 0
-#define MAXPATHLEN 1024
-
-#define MIN(a,b) (min(a,b))
-#define MAX(a,b) (max(a,b))
-#define howmany(x, y) (((x)+((y)-1))/(y))
-
-/*
- * Various platform dependent calls that don't fit anywhere else
- */
-#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
-#define xfs_stack_trace() dump_stack()
-
-
-/* Move the kernel do_div definition off to one side */
-
-#if defined __i386__
-/* For ia32 we need to pull some tricks to get past various versions
- * of the compiler which do not like us using do_div in the middle
- * of large functions.
- */
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
- __u32 mod;
-
- switch (n) {
- case 4:
- mod = *(__u32 *)a % b;
- *(__u32 *)a = *(__u32 *)a / b;
- return mod;
- case 8:
- {
- unsigned long __upper, __low, __high, __mod;
- __u64 c = *(__u64 *)a;
- __upper = __high = c >> 32;
- __low = c;
- if (__high) {
- __upper = __high % (b);
- __high = __high / (b);
- }
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
- asm("":"=A" (c):"a" (__low),"d" (__high));
- *(__u64 *)a = c;
- return __mod;
- }
- }
-
- /* NOTREACHED */
- return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
- switch (n) {
- case 4:
- return *(__u32 *)a % b;
- case 8:
- {
- unsigned long __upper, __low, __high, __mod;
- __u64 c = *(__u64 *)a;
- __upper = __high = c >> 32;
- __low = c;
- if (__high) {
- __upper = __high % (b);
- __high = __high / (b);
- }
- asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (b), "0" (__low), "1" (__upper));
- asm("":"=A" (c):"a" (__low),"d" (__high));
- return __mod;
- }
- }
-
- /* NOTREACHED */
- return 0;
-}
-#else
-static inline __u32 xfs_do_div(void *a, __u32 b, int n)
-{
- __u32 mod;
-
- switch (n) {
- case 4:
- mod = *(__u32 *)a % b;
- *(__u32 *)a = *(__u32 *)a / b;
- return mod;
- case 8:
- mod = do_div(*(__u64 *)a, b);
- return mod;
- }
-
- /* NOTREACHED */
- return 0;
-}
-
-/* Side effect free 64 bit mod operation */
-static inline __u32 xfs_do_mod(void *a, __u32 b, int n)
-{
- switch (n) {
- case 4:
- return *(__u32 *)a % b;
- case 8:
- {
- __u64 c = *(__u64 *)a;
- return do_div(c, b);
- }
- }
-
- /* NOTREACHED */
- return 0;
-}
-#endif
-
-#undef do_div
-#define do_div(a, b) xfs_do_div(&(a), (b), sizeof(a))
-#define do_mod(a, b) xfs_do_mod(&(a), (b), sizeof(a))
-
-static inline __uint64_t roundup_64(__uint64_t x, __uint32_t y)
-{
- x += y - 1;
- do_div(x, y);
- return(x * y);
-}
-
-static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
-{
- x += y - 1;
- do_div(x, y);
- return x;
-}
-
-/* ARM old ABI has some weird alignment/padding */
-#if defined(__arm__) && !defined(__ARM_EABI__)
-#define __arch_pack __attribute__((packed))
-#else
-#define __arch_pack
-#endif
-
-#define ASSERT_ALWAYS(expr) \
- (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef DEBUG
-#define ASSERT(expr) ((void)0)
-
-#ifndef STATIC
-# define STATIC static noinline
-#endif
-
-#else /* DEBUG */
-
-#define ASSERT(expr) \
- (unlikely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
-
-#ifndef STATIC
-# define STATIC noinline
-#endif
-
-#endif /* DEBUG */
-
-#endif /* __XFS_LINUX__ */
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c
deleted file mode 100644
index bd672de..0000000
--- a/fs/xfs/linux-2.6/xfs_message.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright (c) 2011 Red Hat, Inc. All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-
-/*
- * XFS logging functions
- */
-static void
-__xfs_printk(
- const char *level,
- const struct xfs_mount *mp,
- struct va_format *vaf)
-{
- if (mp && mp->m_fsname) {
- printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
- return;
- }
- printk("%sXFS: %pV\n", level, vaf);
-}
-
-#define define_xfs_printk_level(func, kern_level) \
-void func(const struct xfs_mount *mp, const char *fmt, ...) \
-{ \
- struct va_format vaf; \
- va_list args; \
- \
- va_start(args, fmt); \
- \
- vaf.fmt = fmt; \
- vaf.va = &args; \
- \
- __xfs_printk(kern_level, mp, &vaf); \
- va_end(args); \
-} \
-
-define_xfs_printk_level(xfs_emerg, KERN_EMERG);
-define_xfs_printk_level(xfs_alert, KERN_ALERT);
-define_xfs_printk_level(xfs_crit, KERN_CRIT);
-define_xfs_printk_level(xfs_err, KERN_ERR);
-define_xfs_printk_level(xfs_warn, KERN_WARNING);
-define_xfs_printk_level(xfs_notice, KERN_NOTICE);
-define_xfs_printk_level(xfs_info, KERN_INFO);
-#ifdef DEBUG
-define_xfs_printk_level(xfs_debug, KERN_DEBUG);
-#endif
-
-void
-xfs_alert_tag(
- const struct xfs_mount *mp,
- int panic_tag,
- const char *fmt, ...)
-{
- struct va_format vaf;
- va_list args;
- int do_panic = 0;
-
- if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) {
- xfs_alert(mp, "Transforming an alert into a BUG.");
- do_panic = 1;
- }
-
- va_start(args, fmt);
-
- vaf.fmt = fmt;
- vaf.va = &args;
-
- __xfs_printk(KERN_ALERT, mp, &vaf);
- va_end(args);
-
- BUG_ON(do_panic);
-}
-
-void
-assfail(char *expr, char *file, int line)
-{
- xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
- expr, file, line);
- BUG();
-}
-
-void
-xfs_hex_dump(void *p, int length)
-{
- print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_ADDRESS, 16, 1, p, length, 1);
-}
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h
deleted file mode 100644
index 7fb7ea0..0000000
--- a/fs/xfs/linux-2.6/xfs_message.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#ifndef __XFS_MESSAGE_H
-#define __XFS_MESSAGE_H 1
-
-struct xfs_mount;
-
-extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void xfs_alert_tag(const struct xfs_mount *mp, int tag,
- const char *fmt, ...)
- __attribute__ ((format (printf, 3, 4)));
-extern void xfs_crit(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void xfs_err(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void xfs_warn(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void xfs_notice(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-
-#ifdef DEBUG
-extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
- __attribute__ ((format (printf, 2, 3)));
-#else
-static inline void
-__attribute__ ((format (printf, 2, 3)))
-xfs_debug(const struct xfs_mount *mp, const char *fmt, ...)
-{
-}
-#endif
-
-extern void assfail(char *expr, char *f, int l);
-
-extern void xfs_hex_dump(void *p, int length);
-
-#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
deleted file mode 100644
index 29b9d64..0000000
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Copyright (c) 2008, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_sb.h"
-#include "xfs_inum.h"
-#include "xfs_log.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_quota.h"
-#include "xfs_trans.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "quota/xfs_qm.h"
-#include <linux/quota.h>
-
-
-STATIC int
-xfs_quota_type(int type)
-{
- switch (type) {
- case USRQUOTA:
- return XFS_DQ_USER;
- case GRPQUOTA:
- return XFS_DQ_GROUP;
- default:
- return XFS_DQ_PROJ;
- }
-}
-
-STATIC int
-xfs_fs_get_xstate(
- struct super_block *sb,
- struct fs_quota_stat *fqs)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- if (!XFS_IS_QUOTA_RUNNING(mp))
- return -ENOSYS;
- return -xfs_qm_scall_getqstat(mp, fqs);
-}
-
-STATIC int
-xfs_fs_set_xstate(
- struct super_block *sb,
- unsigned int uflags,
- int op)
-{
- struct xfs_mount *mp = XFS_M(sb);
- unsigned int flags = 0;
-
- if (sb->s_flags & MS_RDONLY)
- return -EROFS;
- if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
- return -ENOSYS;
-
- if (uflags & FS_QUOTA_UDQ_ACCT)
- flags |= XFS_UQUOTA_ACCT;
- if (uflags & FS_QUOTA_PDQ_ACCT)
- flags |= XFS_PQUOTA_ACCT;
- if (uflags & FS_QUOTA_GDQ_ACCT)
- flags |= XFS_GQUOTA_ACCT;
- if (uflags & FS_QUOTA_UDQ_ENFD)
- flags |= XFS_UQUOTA_ENFD;
- if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
- flags |= XFS_OQUOTA_ENFD;
-
- switch (op) {
- case Q_XQUOTAON:
- return -xfs_qm_scall_quotaon(mp, flags);
- case Q_XQUOTAOFF:
- if (!XFS_IS_QUOTA_ON(mp))
- return -EINVAL;
- return -xfs_qm_scall_quotaoff(mp, flags);
- case Q_XQUOTARM:
- if (XFS_IS_QUOTA_ON(mp))
- return -EINVAL;
- return -xfs_qm_scall_trunc_qfiles(mp, flags);
- }
-
- return -EINVAL;
-}
-
-STATIC int
-xfs_fs_get_dqblk(
- struct super_block *sb,
- int type,
- qid_t id,
- struct fs_disk_quota *fdq)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- if (!XFS_IS_QUOTA_RUNNING(mp))
- return -ENOSYS;
- if (!XFS_IS_QUOTA_ON(mp))
- return -ESRCH;
-
- return -xfs_qm_scall_getquota(mp, id, xfs_quota_type(type), fdq);
-}
-
-STATIC int
-xfs_fs_set_dqblk(
- struct super_block *sb,
- int type,
- qid_t id,
- struct fs_disk_quota *fdq)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- if (sb->s_flags & MS_RDONLY)
- return -EROFS;
- if (!XFS_IS_QUOTA_RUNNING(mp))
- return -ENOSYS;
- if (!XFS_IS_QUOTA_ON(mp))
- return -ESRCH;
-
- return -xfs_qm_scall_setqlim(mp, id, xfs_quota_type(type), fdq);
-}
-
-const struct quotactl_ops xfs_quotactl_operations = {
- .get_xstate = xfs_fs_get_xstate,
- .set_xstate = xfs_fs_set_xstate,
- .get_dqblk = xfs_fs_get_dqblk,
- .set_dqblk = xfs_fs_set_dqblk,
-};
diff --git a/fs/xfs/linux-2.6/xfs_stats.c b/fs/xfs/linux-2.6/xfs_stats.c
deleted file mode 100644
index 76fdc58..0000000
--- a/fs/xfs/linux-2.6/xfs_stats.c
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include <linux/proc_fs.h>
-
-DEFINE_PER_CPU(struct xfsstats, xfsstats);
-
-static int xfs_stat_proc_show(struct seq_file *m, void *v)
-{
- int c, i, j, val;
- __uint64_t xs_xstrat_bytes = 0;
- __uint64_t xs_write_bytes = 0;
- __uint64_t xs_read_bytes = 0;
-
- static const struct xstats_entry {
- char *desc;
- int endpoint;
- } xstats[] = {
- { "extent_alloc", XFSSTAT_END_EXTENT_ALLOC },
- { "abt", XFSSTAT_END_ALLOC_BTREE },
- { "blk_map", XFSSTAT_END_BLOCK_MAPPING },
- { "bmbt", XFSSTAT_END_BLOCK_MAP_BTREE },
- { "dir", XFSSTAT_END_DIRECTORY_OPS },
- { "trans", XFSSTAT_END_TRANSACTIONS },
- { "ig", XFSSTAT_END_INODE_OPS },
- { "log", XFSSTAT_END_LOG_OPS },
- { "push_ail", XFSSTAT_END_TAIL_PUSHING },
- { "xstrat", XFSSTAT_END_WRITE_CONVERT },
- { "rw", XFSSTAT_END_READ_WRITE_OPS },
- { "attr", XFSSTAT_END_ATTRIBUTE_OPS },
- { "icluster", XFSSTAT_END_INODE_CLUSTER },
- { "vnodes", XFSSTAT_END_VNODE_OPS },
- { "buf", XFSSTAT_END_BUF },
- { "abtb2", XFSSTAT_END_ABTB_V2 },
- { "abtc2", XFSSTAT_END_ABTC_V2 },
- { "bmbt2", XFSSTAT_END_BMBT_V2 },
- { "ibt2", XFSSTAT_END_IBT_V2 },
- };
-
- /* Loop over all stats groups */
- for (i=j = 0; i < ARRAY_SIZE(xstats); i++) {
- seq_printf(m, "%s", xstats[i].desc);
- /* inner loop does each group */
- while (j < xstats[i].endpoint) {
- val = 0;
- /* sum over all cpus */
- for_each_possible_cpu(c)
- val += *(((__u32*)&per_cpu(xfsstats, c) + j));
- seq_printf(m, " %u", val);
- j++;
- }
- seq_putc(m, '\n');
- }
- /* extra precision counters */
- for_each_possible_cpu(i) {
- xs_xstrat_bytes += per_cpu(xfsstats, i).xs_xstrat_bytes;
- xs_write_bytes += per_cpu(xfsstats, i).xs_write_bytes;
- xs_read_bytes += per_cpu(xfsstats, i).xs_read_bytes;
- }
-
- seq_printf(m, "xpc %Lu %Lu %Lu\n",
- xs_xstrat_bytes, xs_write_bytes, xs_read_bytes);
- seq_printf(m, "debug %u\n",
-#if defined(DEBUG)
- 1);
-#else
- 0);
-#endif
- return 0;
-}
-
-static int xfs_stat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xfs_stat_proc_show, NULL);
-}
-
-static const struct file_operations xfs_stat_proc_fops = {
- .owner = THIS_MODULE,
- .open = xfs_stat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-int
-xfs_init_procfs(void)
-{
- if (!proc_mkdir("fs/xfs", NULL))
- goto out;
-
- if (!proc_create("fs/xfs/stat", 0, NULL,
- &xfs_stat_proc_fops))
- goto out_remove_entry;
- return 0;
-
- out_remove_entry:
- remove_proc_entry("fs/xfs", NULL);
- out:
- return -ENOMEM;
-}
-
-void
-xfs_cleanup_procfs(void)
-{
- remove_proc_entry("fs/xfs/stat", NULL);
- remove_proc_entry("fs/xfs", NULL);
-}
diff --git a/fs/xfs/linux-2.6/xfs_stats.h b/fs/xfs/linux-2.6/xfs_stats.h
deleted file mode 100644
index 736854b..0000000
--- a/fs/xfs/linux-2.6/xfs_stats.h
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_STATS_H__
-#define __XFS_STATS_H__
-
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-#include <linux/percpu.h>
-
-/*
- * XFS global statistics
- */
-struct xfsstats {
-# define XFSSTAT_END_EXTENT_ALLOC 4
- __uint32_t xs_allocx;
- __uint32_t xs_allocb;
- __uint32_t xs_freex;
- __uint32_t xs_freeb;
-# define XFSSTAT_END_ALLOC_BTREE (XFSSTAT_END_EXTENT_ALLOC+4)
- __uint32_t xs_abt_lookup;
- __uint32_t xs_abt_compare;
- __uint32_t xs_abt_insrec;
- __uint32_t xs_abt_delrec;
-# define XFSSTAT_END_BLOCK_MAPPING (XFSSTAT_END_ALLOC_BTREE+7)
- __uint32_t xs_blk_mapr;
- __uint32_t xs_blk_mapw;
- __uint32_t xs_blk_unmap;
- __uint32_t xs_add_exlist;
- __uint32_t xs_del_exlist;
- __uint32_t xs_look_exlist;
- __uint32_t xs_cmp_exlist;
-# define XFSSTAT_END_BLOCK_MAP_BTREE (XFSSTAT_END_BLOCK_MAPPING+4)
- __uint32_t xs_bmbt_lookup;
- __uint32_t xs_bmbt_compare;
- __uint32_t xs_bmbt_insrec;
- __uint32_t xs_bmbt_delrec;
-# define XFSSTAT_END_DIRECTORY_OPS (XFSSTAT_END_BLOCK_MAP_BTREE+4)
- __uint32_t xs_dir_lookup;
- __uint32_t xs_dir_create;
- __uint32_t xs_dir_remove;
- __uint32_t xs_dir_getdents;
-# define XFSSTAT_END_TRANSACTIONS (XFSSTAT_END_DIRECTORY_OPS+3)
- __uint32_t xs_trans_sync;
- __uint32_t xs_trans_async;
- __uint32_t xs_trans_empty;
-# define XFSSTAT_END_INODE_OPS (XFSSTAT_END_TRANSACTIONS+7)
- __uint32_t xs_ig_attempts;
- __uint32_t xs_ig_found;
- __uint32_t xs_ig_frecycle;
- __uint32_t xs_ig_missed;
- __uint32_t xs_ig_dup;
- __uint32_t xs_ig_reclaims;
- __uint32_t xs_ig_attrchg;
-# define XFSSTAT_END_LOG_OPS (XFSSTAT_END_INODE_OPS+5)
- __uint32_t xs_log_writes;
- __uint32_t xs_log_blocks;
- __uint32_t xs_log_noiclogs;
- __uint32_t xs_log_force;
- __uint32_t xs_log_force_sleep;
-# define XFSSTAT_END_TAIL_PUSHING (XFSSTAT_END_LOG_OPS+10)
- __uint32_t xs_try_logspace;
- __uint32_t xs_sleep_logspace;
- __uint32_t xs_push_ail;
- __uint32_t xs_push_ail_success;
- __uint32_t xs_push_ail_pushbuf;
- __uint32_t xs_push_ail_pinned;
- __uint32_t xs_push_ail_locked;
- __uint32_t xs_push_ail_flushing;
- __uint32_t xs_push_ail_restarts;
- __uint32_t xs_push_ail_flush;
-# define XFSSTAT_END_WRITE_CONVERT (XFSSTAT_END_TAIL_PUSHING+2)
- __uint32_t xs_xstrat_quick;
- __uint32_t xs_xstrat_split;
-# define XFSSTAT_END_READ_WRITE_OPS (XFSSTAT_END_WRITE_CONVERT+2)
- __uint32_t xs_write_calls;
- __uint32_t xs_read_calls;
-# define XFSSTAT_END_ATTRIBUTE_OPS (XFSSTAT_END_READ_WRITE_OPS+4)
- __uint32_t xs_attr_get;
- __uint32_t xs_attr_set;
- __uint32_t xs_attr_remove;
- __uint32_t xs_attr_list;
-# define XFSSTAT_END_INODE_CLUSTER (XFSSTAT_END_ATTRIBUTE_OPS+3)
- __uint32_t xs_iflush_count;
- __uint32_t xs_icluster_flushcnt;
- __uint32_t xs_icluster_flushinode;
-# define XFSSTAT_END_VNODE_OPS (XFSSTAT_END_INODE_CLUSTER+8)
- __uint32_t vn_active; /* # vnodes not on free lists */
- __uint32_t vn_alloc; /* # times vn_alloc called */
- __uint32_t vn_get; /* # times vn_get called */
- __uint32_t vn_hold; /* # times vn_hold called */
- __uint32_t vn_rele; /* # times vn_rele called */
- __uint32_t vn_reclaim; /* # times vn_reclaim called */
- __uint32_t vn_remove; /* # times vn_remove called */
- __uint32_t vn_free; /* # times vn_free called */
-#define XFSSTAT_END_BUF (XFSSTAT_END_VNODE_OPS+9)
- __uint32_t xb_get;
- __uint32_t xb_create;
- __uint32_t xb_get_locked;
- __uint32_t xb_get_locked_waited;
- __uint32_t xb_busy_locked;
- __uint32_t xb_miss_locked;
- __uint32_t xb_page_retries;
- __uint32_t xb_page_found;
- __uint32_t xb_get_read;
-/* Version 2 btree counters */
-#define XFSSTAT_END_ABTB_V2 (XFSSTAT_END_BUF+15)
- __uint32_t xs_abtb_2_lookup;
- __uint32_t xs_abtb_2_compare;
- __uint32_t xs_abtb_2_insrec;
- __uint32_t xs_abtb_2_delrec;
- __uint32_t xs_abtb_2_newroot;
- __uint32_t xs_abtb_2_killroot;
- __uint32_t xs_abtb_2_increment;
- __uint32_t xs_abtb_2_decrement;
- __uint32_t xs_abtb_2_lshift;
- __uint32_t xs_abtb_2_rshift;
- __uint32_t xs_abtb_2_split;
- __uint32_t xs_abtb_2_join;
- __uint32_t xs_abtb_2_alloc;
- __uint32_t xs_abtb_2_free;
- __uint32_t xs_abtb_2_moves;
-#define XFSSTAT_END_ABTC_V2 (XFSSTAT_END_ABTB_V2+15)
- __uint32_t xs_abtc_2_lookup;
- __uint32_t xs_abtc_2_compare;
- __uint32_t xs_abtc_2_insrec;
- __uint32_t xs_abtc_2_delrec;
- __uint32_t xs_abtc_2_newroot;
- __uint32_t xs_abtc_2_killroot;
- __uint32_t xs_abtc_2_increment;
- __uint32_t xs_abtc_2_decrement;
- __uint32_t xs_abtc_2_lshift;
- __uint32_t xs_abtc_2_rshift;
- __uint32_t xs_abtc_2_split;
- __uint32_t xs_abtc_2_join;
- __uint32_t xs_abtc_2_alloc;
- __uint32_t xs_abtc_2_free;
- __uint32_t xs_abtc_2_moves;
-#define XFSSTAT_END_BMBT_V2 (XFSSTAT_END_ABTC_V2+15)
- __uint32_t xs_bmbt_2_lookup;
- __uint32_t xs_bmbt_2_compare;
- __uint32_t xs_bmbt_2_insrec;
- __uint32_t xs_bmbt_2_delrec;
- __uint32_t xs_bmbt_2_newroot;
- __uint32_t xs_bmbt_2_killroot;
- __uint32_t xs_bmbt_2_increment;
- __uint32_t xs_bmbt_2_decrement;
- __uint32_t xs_bmbt_2_lshift;
- __uint32_t xs_bmbt_2_rshift;
- __uint32_t xs_bmbt_2_split;
- __uint32_t xs_bmbt_2_join;
- __uint32_t xs_bmbt_2_alloc;
- __uint32_t xs_bmbt_2_free;
- __uint32_t xs_bmbt_2_moves;
-#define XFSSTAT_END_IBT_V2 (XFSSTAT_END_BMBT_V2+15)
- __uint32_t xs_ibt_2_lookup;
- __uint32_t xs_ibt_2_compare;
- __uint32_t xs_ibt_2_insrec;
- __uint32_t xs_ibt_2_delrec;
- __uint32_t xs_ibt_2_newroot;
- __uint32_t xs_ibt_2_killroot;
- __uint32_t xs_ibt_2_increment;
- __uint32_t xs_ibt_2_decrement;
- __uint32_t xs_ibt_2_lshift;
- __uint32_t xs_ibt_2_rshift;
- __uint32_t xs_ibt_2_split;
- __uint32_t xs_ibt_2_join;
- __uint32_t xs_ibt_2_alloc;
- __uint32_t xs_ibt_2_free;
- __uint32_t xs_ibt_2_moves;
-/* Extra precision counters */
- __uint64_t xs_xstrat_bytes;
- __uint64_t xs_write_bytes;
- __uint64_t xs_read_bytes;
-};
-
-DECLARE_PER_CPU(struct xfsstats, xfsstats);
-
-/*
- * We don't disable preempt, not too worried about poking the
- * wrong CPU's stat for now (also aggregated before reporting).
- */
-#define XFS_STATS_INC(v) (per_cpu(xfsstats, current_cpu()).v++)
-#define XFS_STATS_DEC(v) (per_cpu(xfsstats, current_cpu()).v--)
-#define XFS_STATS_ADD(v, inc) (per_cpu(xfsstats, current_cpu()).v += (inc))
-
-extern int xfs_init_procfs(void);
-extern void xfs_cleanup_procfs(void);
-
-
-#else /* !CONFIG_PROC_FS */
-
-# define XFS_STATS_INC(count)
-# define XFS_STATS_DEC(count)
-# define XFS_STATS_ADD(count, inc)
-
-static inline int xfs_init_procfs(void)
-{
- return 0;
-}
-
-static inline void xfs_cleanup_procfs(void)
-{
-}
-
-#endif /* !CONFIG_PROC_FS */
-
-#endif /* __XFS_STATS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
deleted file mode 100644
index e6ac98c..0000000
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ /dev/null
@@ -1,1720 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "xfs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
-#include "xfs_ialloc.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_fsops.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_vnodeops.h"
-#include "xfs_log_priv.h"
-#include "xfs_trans_priv.h"
-#include "xfs_filestream.h"
-#include "xfs_da_btree.h"
-#include "xfs_extfree_item.h"
-#include "xfs_mru_cache.h"
-#include "xfs_inode_item.h"
-#include "xfs_sync.h"
-#include "xfs_trace.h"
-
-#include <linux/namei.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/mount.h>
-#include <linux/mempool.h>
-#include <linux/writeback.h>
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-#include <linux/parser.h>
-
-static const struct super_operations xfs_super_operations;
-static kmem_zone_t *xfs_ioend_zone;
-mempool_t *xfs_ioend_pool;
-
-#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
-#define MNTOPT_LOGDEV "logdev" /* log device */
-#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */
-#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */
-#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */
-#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */
-#define MNTOPT_MTPT "mtpt" /* filesystem mount point */
-#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */
-#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
-#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
- * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
-#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
-#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
-#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes
- * in stat(). */
-#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA "quota" /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota" /* no quotas */
-#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */
-#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */
-#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */
-#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
-#define MNTOPT_DELAYLOG "delaylog" /* Delayed logging enabled */
-#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed logging disabled */
-#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
-#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
-
-/*
- * Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
- */
-enum {
- Opt_barrier, Opt_nobarrier, Opt_err
-};
-
-static const match_table_t tokens = {
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_err, NULL}
-};
-
-
-STATIC unsigned long
-suffix_strtoul(char *s, char **endp, unsigned int base)
-{
- int last, shift_left_factor = 0;
- char *value = s;
-
- last = strlen(value) - 1;
- if (value[last] == 'K' || value[last] == 'k') {
- shift_left_factor = 10;
- value[last] = '\0';
- }
- if (value[last] == 'M' || value[last] == 'm') {
- shift_left_factor = 20;
- value[last] = '\0';
- }
- if (value[last] == 'G' || value[last] == 'g') {
- shift_left_factor = 30;
- value[last] = '\0';
- }
-
- return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure. The caller takes care of them.
- */
-STATIC int
-xfs_parseargs(
- struct xfs_mount *mp,
- char *options)
-{
- struct super_block *sb = mp->m_super;
- char *this_char, *value, *eov;
- int dsunit = 0;
- int dswidth = 0;
- int iosize = 0;
- __uint8_t iosizelog = 0;
-
- /*
- * set up the mount name first so all the errors will refer to the
- * correct device.
- */
- mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
- if (!mp->m_fsname)
- return ENOMEM;
- mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
- /*
- * Copy binary VFS mount flags we are interested in.
- */
- if (sb->s_flags & MS_RDONLY)
- mp->m_flags |= XFS_MOUNT_RDONLY;
- if (sb->s_flags & MS_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (sb->s_flags & MS_SYNCHRONOUS)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-
- /*
- * Set some default flags that could be cleared by the mount option
- * parsing.
- */
- mp->m_flags |= XFS_MOUNT_BARRIER;
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- mp->m_flags |= XFS_MOUNT_DELAYLOG;
-
- /*
- * These can be overridden by the mount option parsing.
- */
- mp->m_logbufs = -1;
- mp->m_logbsize = -1;
-
- if (!options)
- goto done;
-
- while ((this_char = strsep(&options, ",")) != NULL) {
- if (!*this_char)
- continue;
- if ((value = strchr(this_char, '=')) != NULL)
- *value++ = 0;
-
- if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- mp->m_logbufs = simple_strtoul(value, &eov, 10);
- } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- mp->m_logbsize = suffix_strtoul(value, &eov, 10);
- } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
- if (!mp->m_logname)
- return ENOMEM;
- } else if (!strcmp(this_char, MNTOPT_MTPT)) {
- xfs_warn(mp, "%s option not allowed on this system",
- this_char);
- return EINVAL;
- } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
- if (!mp->m_rtname)
- return ENOMEM;
- } else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- iosize = simple_strtoul(value, &eov, 10);
- iosizelog = ffs(iosize) - 1;
- } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- iosize = suffix_strtoul(value, &eov, 10);
- iosizelog = ffs(iosize) - 1;
- } else if (!strcmp(this_char, MNTOPT_GRPID) ||
- !strcmp(this_char, MNTOPT_BSDGROUPS)) {
- mp->m_flags |= XFS_MOUNT_GRPID;
- } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
- !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
- mp->m_flags &= ~XFS_MOUNT_GRPID;
- } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
- mp->m_flags |= XFS_MOUNT_WSYNC;
- } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- dsunit = simple_strtoul(value, &eov, 10);
- } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return EINVAL;
- }
- dswidth = simple_strtoul(value, &eov, 10);
- } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
- mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
-#if !XFS_BIG_INUMS
- xfs_warn(mp, "%s option not allowed on this system",
- this_char);
- return EINVAL;
-#endif
- } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
- mp->m_flags |= XFS_MOUNT_NOUUID;
- } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
- mp->m_flags |= XFS_MOUNT_BARRIER;
- } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
- mp->m_flags |= XFS_MOUNT_IKEEP;
- } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
- mp->m_flags &= ~XFS_MOUNT_IKEEP;
- } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
- mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
- } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
- } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
- mp->m_flags |= XFS_MOUNT_ATTR2;
- } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
- mp->m_flags &= ~XFS_MOUNT_ATTR2;
- mp->m_flags |= XFS_MOUNT_NOATTR2;
- } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
- } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
- mp->m_qflags &= ~(XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
- XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- XFS_UQUOTA_ENFD | XFS_OQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
- !strcmp(this_char, MNTOPT_UQUOTA) ||
- !strcmp(this_char, MNTOPT_USRQUOTA)) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
- XFS_UQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
- !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_UQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
- !strcmp(this_char, MNTOPT_PRJQUOTA)) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- XFS_OQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_OQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
- !strcmp(this_char, MNTOPT_GRPQUOTA)) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- XFS_OQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_OQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
- mp->m_flags |= XFS_MOUNT_DELAYLOG;
- } else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
- mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
- } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
- mp->m_flags |= XFS_MOUNT_DISCARD;
- } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
- mp->m_flags &= ~XFS_MOUNT_DISCARD;
- } else if (!strcmp(this_char, "ihashsize")) {
- xfs_warn(mp,
- "ihashsize no longer used, option is deprecated.");
- } else if (!strcmp(this_char, "osyncisdsync")) {
- xfs_warn(mp,
- "osyncisdsync has no effect, option is deprecated.");
- } else if (!strcmp(this_char, "osyncisosync")) {
- xfs_warn(mp,
- "osyncisosync has no effect, option is deprecated.");
- } else if (!strcmp(this_char, "irixsgid")) {
- xfs_warn(mp,
- "irixsgid is now a sysctl(2) variable, option is deprecated.");
- } else {
- xfs_warn(mp, "unknown mount option [%s].", this_char);
- return EINVAL;
- }
- }
-
- /*
- * no recovery flag requires a read-only mount
- */
- if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
- !(mp->m_flags & XFS_MOUNT_RDONLY)) {
- xfs_warn(mp, "no-recovery mounts must be read-only.");
- return EINVAL;
- }
-
- if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
- xfs_warn(mp,
- "sunit and swidth options incompatible with the noalign option");
- return EINVAL;
- }
-
- if ((mp->m_flags & XFS_MOUNT_DISCARD) &&
- !(mp->m_flags & XFS_MOUNT_DELAYLOG)) {
- xfs_warn(mp,
- "the discard option is incompatible with the nodelaylog option");
- return EINVAL;
- }
-
-#ifndef CONFIG_XFS_QUOTA
- if (XFS_IS_QUOTA_RUNNING(mp)) {
- xfs_warn(mp, "quota support not available in this kernel.");
- return EINVAL;
- }
-#endif
-
- if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
- (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
- xfs_warn(mp, "cannot mount with both project and group quota");
- return EINVAL;
- }
-
- if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
- xfs_warn(mp, "sunit and swidth must be specified together");
- return EINVAL;
- }
-
- if (dsunit && (dswidth % dsunit != 0)) {
- xfs_warn(mp,
- "stripe width (%d) must be a multiple of the stripe unit (%d)",
- dswidth, dsunit);
- return EINVAL;
- }
-
-done:
- if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- if (dsunit) {
- mp->m_dalign = dsunit;
- mp->m_flags |= XFS_MOUNT_RETERR;
- }
-
- if (dswidth)
- mp->m_swidth = dswidth;
- }
-
- if (mp->m_logbufs != -1 &&
- mp->m_logbufs != 0 &&
- (mp->m_logbufs < XLOG_MIN_ICLOGS ||
- mp->m_logbufs > XLOG_MAX_ICLOGS)) {
- xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
- mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return XFS_ERROR(EINVAL);
- }
- if (mp->m_logbsize != -1 &&
- mp->m_logbsize != 0 &&
- (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
- mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(mp->m_logbsize))) {
- xfs_warn(mp,
- "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- mp->m_logbsize);
- return XFS_ERROR(EINVAL);
- }
-
- if (iosizelog) {
- if (iosizelog > XFS_MAX_IO_LOG ||
- iosizelog < XFS_MIN_IO_LOG) {
- xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
- iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return XFS_ERROR(EINVAL);
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = iosizelog;
- mp->m_writeio_log = iosizelog;
- }
-
- return 0;
-}
-
-struct proc_xfs_info {
- int flag;
- char *str;
-};
-
-STATIC int
-xfs_showargs(
- struct xfs_mount *mp,
- struct seq_file *m)
-{
- static struct proc_xfs_info xfs_info_set[] = {
- /* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP },
- { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC },
- { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN },
- { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
- { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
- { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
- { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
- { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
- { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
- { XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
- { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
- { 0, NULL }
- };
- static struct proc_xfs_info xfs_info_unset[] = {
- /* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO },
- { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER },
- { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE },
- { 0, NULL }
- };
- struct proc_xfs_info *xfs_infop;
-
- for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
- if (mp->m_flags & xfs_infop->flag)
- seq_puts(m, xfs_infop->str);
- }
- for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
- if (!(mp->m_flags & xfs_infop->flag))
- seq_puts(m, xfs_infop->str);
- }
-
- if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
- seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
- (int)(1 << mp->m_writeio_log) >> 10);
-
- if (mp->m_logbufs > 0)
- seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
- if (mp->m_logbsize > 0)
- seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
-
- if (mp->m_logname)
- seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
- if (mp->m_rtname)
- seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
-
- if (mp->m_dalign > 0)
- seq_printf(m, "," MNTOPT_SUNIT "=%d",
- (int)XFS_FSB_TO_BB(mp, mp->m_dalign));
- if (mp->m_swidth > 0)
- seq_printf(m, "," MNTOPT_SWIDTH "=%d",
- (int)XFS_FSB_TO_BB(mp, mp->m_swidth));
-
- if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
- seq_puts(m, "," MNTOPT_USRQUOTA);
- else if (mp->m_qflags & XFS_UQUOTA_ACCT)
- seq_puts(m, "," MNTOPT_UQUOTANOENF);
-
- /* Either project or group quotas can be active, not both */
-
- if (mp->m_qflags & XFS_PQUOTA_ACCT) {
- if (mp->m_qflags & XFS_OQUOTA_ENFD)
- seq_puts(m, "," MNTOPT_PRJQUOTA);
- else
- seq_puts(m, "," MNTOPT_PQUOTANOENF);
- } else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
- if (mp->m_qflags & XFS_OQUOTA_ENFD)
- seq_puts(m, "," MNTOPT_GRPQUOTA);
- else
- seq_puts(m, "," MNTOPT_GQUOTANOENF);
- }
-
- if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
- seq_puts(m, "," MNTOPT_NOQUOTA);
-
- return 0;
-}
-__uint64_t
-xfs_max_file_offset(
- unsigned int blockshift)
-{
- unsigned int pagefactor = 1;
- unsigned int bitshift = BITS_PER_LONG - 1;
-
- /* Figure out maximum filesize, on Linux this can depend on
- * the filesystem blocksize (on 32 bit platforms).
- * __block_write_begin does this in an [unsigned] long...
- * page->index << (PAGE_CACHE_SHIFT - bbits)
- * So, for page sized blocks (4K on 32 bit platforms),
- * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
- * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
- * but for smaller blocksizes it is less (bbits = log2 bsize).
- * Note1: get_block_t takes a long (implicit cast from above)
- * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
- * can optionally convert the [unsigned] long from above into
- * an [unsigned] long long.
- */
-
-#if BITS_PER_LONG == 32
-# if defined(CONFIG_LBDAF)
- ASSERT(sizeof(sector_t) == 8);
- pagefactor = PAGE_CACHE_SIZE;
- bitshift = BITS_PER_LONG;
-# else
- pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
-# endif
-#endif
-
- return (((__uint64_t)pagefactor) << bitshift) - 1;
-}
-
-STATIC int
-xfs_blkdev_get(
- xfs_mount_t *mp,
- const char *name,
- struct block_device **bdevp)
-{
- int error = 0;
-
- *bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- mp);
- if (IS_ERR(*bdevp)) {
- error = PTR_ERR(*bdevp);
- xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
- }
-
- return -error;
-}
-
-STATIC void
-xfs_blkdev_put(
- struct block_device *bdev)
-{
- if (bdev)
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-}
-
-void
-xfs_blkdev_issue_flush(
- xfs_buftarg_t *buftarg)
-{
- blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
-}
-
-STATIC void
-xfs_close_devices(
- struct xfs_mount *mp)
-{
- if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
- struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
- xfs_free_buftarg(mp, mp->m_logdev_targp);
- xfs_blkdev_put(logdev);
- }
- if (mp->m_rtdev_targp) {
- struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
- xfs_free_buftarg(mp, mp->m_rtdev_targp);
- xfs_blkdev_put(rtdev);
- }
- xfs_free_buftarg(mp, mp->m_ddev_targp);
-}
-
-/*
- * The file system configurations are:
- * (1) device (partition) with data and internal log
- * (2) logical volume with data and log subvolumes.
- * (3) logical volume with data, log, and realtime subvolumes.
- *
- * We only have to handle opening the log and realtime volumes here if
- * they are present. The data subvolume has already been opened by
- * get_sb_bdev() and is stored in sb->s_bdev.
- */
-STATIC int
-xfs_open_devices(
- struct xfs_mount *mp)
-{
- struct block_device *ddev = mp->m_super->s_bdev;
- struct block_device *logdev = NULL, *rtdev = NULL;
- int error;
-
- /*
- * Open real time and log devices - order is important.
- */
- if (mp->m_logname) {
- error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
- if (error)
- goto out;
- }
-
- if (mp->m_rtname) {
- error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
- if (error)
- goto out_close_logdev;
-
- if (rtdev == ddev || rtdev == logdev) {
- xfs_warn(mp,
- "Cannot mount filesystem with identical rtdev and ddev/logdev.");
- error = EINVAL;
- goto out_close_rtdev;
- }
- }
-
- /*
- * Setup xfs_mount buffer target pointers
- */
- error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
- if (!mp->m_ddev_targp)
- goto out_close_rtdev;
-
- if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
- mp->m_fsname);
- if (!mp->m_rtdev_targp)
- goto out_free_ddev_targ;
- }
-
- if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
- mp->m_fsname);
- if (!mp->m_logdev_targp)
- goto out_free_rtdev_targ;
- } else {
- mp->m_logdev_targp = mp->m_ddev_targp;
- }
-
- return 0;
-
- out_free_rtdev_targ:
- if (mp->m_rtdev_targp)
- xfs_free_buftarg(mp, mp->m_rtdev_targp);
- out_free_ddev_targ:
- xfs_free_buftarg(mp, mp->m_ddev_targp);
- out_close_rtdev:
- if (rtdev)
- xfs_blkdev_put(rtdev);
- out_close_logdev:
- if (logdev && logdev != ddev)
- xfs_blkdev_put(logdev);
- out:
- return error;
-}
-
-/*
- * Setup xfs_mount buffer target pointers based on superblock
- */
-STATIC int
-xfs_setup_devices(
- struct xfs_mount *mp)
-{
- int error;
-
- error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
- if (error)
- return error;
-
- if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
- unsigned int log_sector_size = BBSIZE;
-
- if (xfs_sb_version_hassector(&mp->m_sb))
- log_sector_size = mp->m_sb.sb_logsectsize;
- error = xfs_setsize_buftarg(mp->m_logdev_targp,
- mp->m_sb.sb_blocksize,
- log_sector_size);
- if (error)
- return error;
- }
- if (mp->m_rtdev_targp) {
- error = xfs_setsize_buftarg(mp->m_rtdev_targp,
- mp->m_sb.sb_blocksize,
- mp->m_sb.sb_sectsize);
- if (error)
- return error;
- }
-
- return 0;
-}
-
-/* Catch misguided souls that try to use this interface on XFS */
-STATIC struct inode *
-xfs_fs_alloc_inode(
- struct super_block *sb)
-{
- BUG();
- return NULL;
-}
-
-/*
- * Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
- */
-STATIC void
-xfs_fs_destroy_inode(
- struct inode *inode)
-{
- struct xfs_inode *ip = XFS_I(inode);
-
- trace_xfs_destroy_inode(ip);
-
- XFS_STATS_INC(vn_reclaim);
-
- /* bad inode, get out here ASAP */
- if (is_bad_inode(inode))
- goto out_reclaim;
-
- xfs_ioend_wait(ip);
-
- ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
-
- /*
- * We should never get here with one of the reclaim flags already set.
- */
- ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
-
- /*
- * We always use background reclaim here because even if the
- * inode is clean, it still may be under IO and hence we have
- * to take the flush lock. The background reclaim path handles
- * this more efficiently than we can here, so simply let background
- * reclaim tear down all inodes.
- */
-out_reclaim:
- xfs_inode_set_reclaim_tag(ip);
-}
-
-/*
- * Slab object creation initialisation for the XFS inode.
- * This covers only the idempotent fields in the XFS inode;
- * all other fields need to be initialised on allocation
- * from the slab. This avoids the need to repeatedly initialise
- * fields in the xfs inode that left in the initialise state
- * when freeing the inode.
- */
-STATIC void
-xfs_fs_inode_init_once(
- void *inode)
-{
- struct xfs_inode *ip = inode;
-
- memset(ip, 0, sizeof(struct xfs_inode));
-
- /* vfs inode */
- inode_init_once(VFS_I(ip));
-
- /* xfs inode */
- atomic_set(&ip->i_iocount, 0);
- atomic_set(&ip->i_pincount, 0);
- spin_lock_init(&ip->i_flags_lock);
- init_waitqueue_head(&ip->i_ipin_wait);
- /*
- * Because we want to use a counting completion, complete
- * the flush completion once to allow a single access to
- * the flush completion without blocking.
- */
- init_completion(&ip->i_flush);
- complete(&ip->i_flush);
-
- mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
- "xfsino", ip->i_ino);
-}
-
-/*
- * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode.
- *
- * We need the barrier() to maintain correct ordering between unlogged
- * updates and the transaction commit code that clears the i_update_core
- * field. This requires all updates to be completed before marking the
- * inode dirty.
- */
-STATIC void
-xfs_fs_dirty_inode(
- struct inode *inode,
- int flags)
-{
- barrier();
- XFS_I(inode)->i_update_core = 1;
-}
-
-STATIC int
-xfs_fs_write_inode(
- struct inode *inode,
- struct writeback_control *wbc)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- int error = EAGAIN;
-
- trace_xfs_write_inode(ip);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -XFS_ERROR(EIO);
-
- if (wbc->sync_mode == WB_SYNC_ALL || wbc->for_kupdate) {
- /*
- * Make sure the inode has made it it into the log. Instead
- * of forcing it all the way to stable storage using a
- * synchronous transaction we let the log force inside the
- * ->sync_fs call do that for thus, which reduces the number
- * of synchronous log foces dramatically.
- */
- xfs_ioend_wait(ip);
- error = xfs_log_dirty_inode(ip, NULL, 0);
- if (error)
- goto out;
- return 0;
- } else {
- if (!ip->i_update_core)
- return 0;
-
- /*
- * We make this non-blocking if the inode is contended, return
- * EAGAIN to indicate to the caller that they did not succeed.
- * This prevents the flush path from blocking on inodes inside
- * another operation right now, they get caught later by
- * xfs_sync.
- */
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
- goto out;
-
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
- goto out_unlock;
-
- /*
- * Now we have the flush lock and the inode is not pinned, we
- * can check if the inode is really clean as we know that
- * there are no pending transaction completions, it is not
- * waiting on the delayed write queue and there is no IO in
- * progress.
- */
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- error = 0;
- goto out_unlock;
- }
- error = xfs_iflush(ip, SYNC_TRYLOCK);
- }
-
- out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- out:
- /*
- * if we failed to write out the inode then mark
- * it dirty again so we'll try again later.
- */
- if (error)
- xfs_mark_inode_dirty_sync(ip);
- return -error;
-}
-
-STATIC void
-xfs_fs_evict_inode(
- struct inode *inode)
-{
- xfs_inode_t *ip = XFS_I(inode);
-
- trace_xfs_evict_inode(ip);
-
- truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
- XFS_STATS_INC(vn_rele);
- XFS_STATS_INC(vn_remove);
- XFS_STATS_DEC(vn_active);
-
- /*
- * The iolock is used by the file system to coordinate reads,
- * writes, and block truncates. Up to this point the lock
- * protected concurrent accesses by users of the inode. But
- * from here forward we're doing some final processing of the
- * inode because we're done with it, and although we reuse the
- * iolock for protection it is really a distinct lock class
- * (in the lockdep sense) from before. To keep lockdep happy
- * (and basically indicate what we are doing), we explicitly
- * re-init the iolock here.
- */
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
- &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
- xfs_inactive(ip);
-}
-
-STATIC void
-xfs_free_fsname(
- struct xfs_mount *mp)
-{
- kfree(mp->m_fsname);
- kfree(mp->m_rtname);
- kfree(mp->m_logname);
-}
-
-STATIC void
-xfs_fs_put_super(
- struct super_block *sb)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- /*
- * Unregister the memory shrinker before we tear down the mount
- * structure so we don't have memory reclaim racing with us here.
- */
- xfs_inode_shrinker_unregister(mp);
- xfs_syncd_stop(mp);
-
- /*
- * Blow away any referenced inode in the filestreams cache.
- * This can and will cause log traffic as inodes go inactive
- * here.
- */
- xfs_filestream_unmount(mp);
-
- XFS_bflush(mp->m_ddev_targp);
-
- xfs_unmountfs(mp);
- xfs_freesb(mp);
- xfs_icsb_destroy_counters(mp);
- xfs_close_devices(mp);
- xfs_free_fsname(mp);
- kfree(mp);
-}
-
-STATIC int
-xfs_fs_sync_fs(
- struct super_block *sb,
- int wait)
-{
- struct xfs_mount *mp = XFS_M(sb);
- int error;
-
- /*
- * Not much we can do for the first async pass. Writing out the
- * superblock would be counter-productive as we are going to redirty
- * when writing out other data and metadata (and writing out a single
- * block is quite fast anyway).
- *
- * Try to asynchronously kick off quota syncing at least.
- */
- if (!wait) {
- xfs_qm_sync(mp, SYNC_TRYLOCK);
- return 0;
- }
-
- error = xfs_quiesce_data(mp);
- if (error)
- return -error;
-
- if (laptop_mode) {
- /*
- * The disk must be active because we're syncing.
- * We schedule xfssyncd now (now that the disk is
- * active) instead of later (when it might not be).
- */
- flush_delayed_work_sync(&mp->m_sync_work);
- }
-
- return 0;
-}
-
-STATIC int
-xfs_fs_statfs(
- struct dentry *dentry,
- struct kstatfs *statp)
-{
- struct xfs_mount *mp = XFS_M(dentry->d_sb);
- xfs_sb_t *sbp = &mp->m_sb;
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
- __uint64_t fakeinos, id;
- xfs_extlen_t lsize;
- __int64_t ffree;
-
- statp->f_type = XFS_SB_MAGIC;
- statp->f_namelen = MAXNAMELEN - 1;
-
- id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
- statp->f_fsid.val[0] = (u32)id;
- statp->f_fsid.val[1] = (u32)(id >> 32);
-
- xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
-
- spin_lock(&mp->m_sb_lock);
- statp->f_bsize = sbp->sb_blocksize;
- lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
- statp->f_blocks = sbp->sb_dblocks - lsize;
- statp->f_bfree = statp->f_bavail =
- sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
- fakeinos = statp->f_bfree << sbp->sb_inopblog;
- statp->f_files =
- MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
- if (mp->m_maxicount)
- statp->f_files = min_t(typeof(statp->f_files),
- statp->f_files,
- mp->m_maxicount);
-
- /* make sure statp->f_ffree does not underflow */
- ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
- statp->f_ffree = max_t(__int64_t, ffree, 0);
-
- spin_unlock(&mp->m_sb_lock);
-
- if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
- ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
- xfs_qm_statvfs(ip, statp);
- return 0;
-}
-
-STATIC void
-xfs_save_resvblks(struct xfs_mount *mp)
-{
- __uint64_t resblks = 0;
-
- mp->m_resblks_save = mp->m_resblks;
- xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC void
-xfs_restore_resvblks(struct xfs_mount *mp)
-{
- __uint64_t resblks;
-
- if (mp->m_resblks_save) {
- resblks = mp->m_resblks_save;
- mp->m_resblks_save = 0;
- } else
- resblks = xfs_default_resblks(mp);
-
- xfs_reserve_blocks(mp, &resblks, NULL);
-}
-
-STATIC int
-xfs_fs_remount(
- struct super_block *sb,
- int *flags,
- char *options)
-{
- struct xfs_mount *mp = XFS_M(sb);
- substring_t args[MAX_OPT_ARGS];
- char *p;
- int error;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
-
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_barrier:
- mp->m_flags |= XFS_MOUNT_BARRIER;
- break;
- case Opt_nobarrier:
- mp->m_flags &= ~XFS_MOUNT_BARRIER;
- break;
- default:
- /*
- * Logically we would return an error here to prevent
- * users from believing they might have changed
- * mount options using remount which can't be changed.
- *
- * But unfortunately mount(8) adds all options from
- * mtab and fstab to the mount arguments in some cases
- * so we can't blindly reject options, but have to
- * check for each specified option if it actually
- * differs from the currently set option and only
- * reject it if that's the case.
- *
- * Until that is implemented we return success for
- * every remount request, and silently ignore all
- * options that we can't actually change.
- */
-#if 0
- xfs_info(mp,
- "mount option \"%s\" not supported for remount\n", p);
- return -EINVAL;
-#else
- break;
-#endif
- }
- }
-
- /* ro -> rw */
- if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
- mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
- /*
- * If this is the first remount to writeable state we
- * might have some superblock changes to update.
- */
- if (mp->m_update_flags) {
- error = xfs_mount_log_sb(mp, mp->m_update_flags);
- if (error) {
- xfs_warn(mp, "failed to write sb changes");
- return error;
- }
- mp->m_update_flags = 0;
- }
-
- /*
- * Fill out the reserve pool if it is empty. Use the stashed
- * value if it is non-zero, otherwise go with the default.
- */
- xfs_restore_resvblks(mp);
- }
-
- /* rw -> ro */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
- /*
- * After we have synced the data but before we sync the
- * metadata, we need to free up the reserve block pool so that
- * the used block count in the superblock on disk is correct at
- * the end of the remount. Stash the current reserve pool size
- * so that if we get remounted rw, we can return it to the same
- * size.
- */
-
- xfs_quiesce_data(mp);
- xfs_save_resvblks(mp);
- xfs_quiesce_attr(mp);
- mp->m_flags |= XFS_MOUNT_RDONLY;
- }
-
- return 0;
-}
-
-/*
- * Second stage of a freeze. The data is already frozen so we only
- * need to take care of the metadata. Once that's done write a dummy
- * record to dirty the log in case of a crash while frozen.
- */
-STATIC int
-xfs_fs_freeze(
- struct super_block *sb)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- xfs_save_resvblks(mp);
- xfs_quiesce_attr(mp);
- return -xfs_fs_log_dummy(mp);
-}
-
-STATIC int
-xfs_fs_unfreeze(
- struct super_block *sb)
-{
- struct xfs_mount *mp = XFS_M(sb);
-
- xfs_restore_resvblks(mp);
- return 0;
-}
-
-STATIC int
-xfs_fs_show_options(
- struct seq_file *m,
- struct vfsmount *mnt)
-{
- return -xfs_showargs(XFS_M(mnt->mnt_sb), m);
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock _has_ now been read in.
- */
-STATIC int
-xfs_finish_flags(
- struct xfs_mount *mp)
-{
- int ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
-
- /* Fail a mount where the logbuf is smaller than the log stripe */
- if (xfs_sb_version_haslogv2(&mp->m_sb)) {
- if (mp->m_logbsize <= 0 &&
- mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
- mp->m_logbsize = mp->m_sb.sb_logsunit;
- } else if (mp->m_logbsize > 0 &&
- mp->m_logbsize < mp->m_sb.sb_logsunit) {
- xfs_warn(mp,
- "logbuf size must be greater than or equal to log stripe size");
- return XFS_ERROR(EINVAL);
- }
- } else {
- /* Fail a mount if the logbuf is larger than 32K */
- if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
- xfs_warn(mp,
- "logbuf size for version 1 logs must be 16K or 32K");
- return XFS_ERROR(EINVAL);
- }
- }
-
- /*
- * mkfs'ed attr2 will turn on attr2 mount unless explicitly
- * told by noattr2 to turn it off
- */
- if (xfs_sb_version_hasattr2(&mp->m_sb) &&
- !(mp->m_flags & XFS_MOUNT_NOATTR2))
- mp->m_flags |= XFS_MOUNT_ATTR2;
-
- /*
- * prohibit r/w mounts of read-only filesystems
- */
- if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
- xfs_warn(mp,
- "cannot mount a read-only filesystem as read-write");
- return XFS_ERROR(EROFS);
- }
-
- return 0;
-}
-
-STATIC int
-xfs_fs_fill_super(
- struct super_block *sb,
- void *data,
- int silent)
-{
- struct inode *root;
- struct xfs_mount *mp = NULL;
- int flags = 0, error = ENOMEM;
-
- mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
- if (!mp)
- goto out;
-
- spin_lock_init(&mp->m_sb_lock);
- mutex_init(&mp->m_growlock);
- atomic_set(&mp->m_active_trans, 0);
-
- mp->m_super = sb;
- sb->s_fs_info = mp;
-
- error = xfs_parseargs(mp, (char *)data);
- if (error)
- goto out_free_fsname;
-
- sb_min_blocksize(sb, BBSIZE);
- sb->s_xattr = xfs_xattr_handlers;
- sb->s_export_op = &xfs_export_operations;
-#ifdef CONFIG_XFS_QUOTA
- sb->s_qcop = &xfs_quotactl_operations;
-#endif
- sb->s_op = &xfs_super_operations;
-
- if (silent)
- flags |= XFS_MFSI_QUIET;
-
- error = xfs_open_devices(mp);
- if (error)
- goto out_free_fsname;
-
- error = xfs_icsb_init_counters(mp);
- if (error)
- goto out_close_devices;
-
- error = xfs_readsb(mp, flags);
- if (error)
- goto out_destroy_counters;
-
- error = xfs_finish_flags(mp);
- if (error)
- goto out_free_sb;
-
- error = xfs_setup_devices(mp);
- if (error)
- goto out_free_sb;
-
- error = xfs_filestream_mount(mp);
- if (error)
- goto out_free_sb;
-
- /*
- * we must configure the block size in the superblock before we run the
- * full mount process as the mount process can lookup and cache inodes.
- * For the same reason we must also initialise the syncd and register
- * the inode cache shrinker so that inodes can be reclaimed during
- * operations like a quotacheck that iterate all inodes in the
- * filesystem.
- */
- sb->s_magic = XFS_SB_MAGIC;
- sb->s_blocksize = mp->m_sb.sb_blocksize;
- sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
- sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
- sb->s_time_gran = 1;
- set_posix_acl_flag(sb);
-
- xfs_inode_shrinker_register(mp);
-
- error = xfs_mountfs(mp);
- if (error)
- goto out_filestream_unmount;
-
- error = xfs_syncd_init(mp);
- if (error)
- goto out_unmount;
-
- root = igrab(VFS_I(mp->m_rootip));
- if (!root) {
- error = ENOENT;
- goto out_syncd_stop;
- }
- if (is_bad_inode(root)) {
- error = EINVAL;
- goto out_syncd_stop;
- }
- sb->s_root = d_alloc_root(root);
- if (!sb->s_root) {
- error = ENOMEM;
- goto out_iput;
- }
-
- return 0;
-
- out_filestream_unmount:
- xfs_inode_shrinker_unregister(mp);
- xfs_filestream_unmount(mp);
- out_free_sb:
- xfs_freesb(mp);
- out_destroy_counters:
- xfs_icsb_destroy_counters(mp);
- out_close_devices:
- xfs_close_devices(mp);
- out_free_fsname:
- xfs_free_fsname(mp);
- kfree(mp);
- out:
- return -error;
-
- out_iput:
- iput(root);
- out_syncd_stop:
- xfs_syncd_stop(mp);
- out_unmount:
- xfs_inode_shrinker_unregister(mp);
-
- /*
- * Blow away any referenced inode in the filestreams cache.
- * This can and will cause log traffic as inodes go inactive
- * here.
- */
- xfs_filestream_unmount(mp);
-
- XFS_bflush(mp->m_ddev_targp);
-
- xfs_unmountfs(mp);
- goto out_free_sb;
-}
-
-STATIC struct dentry *
-xfs_fs_mount(
- struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
-{
- return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
-}
-
-static const struct super_operations xfs_super_operations = {
- .alloc_inode = xfs_fs_alloc_inode,
- .destroy_inode = xfs_fs_destroy_inode,
- .dirty_inode = xfs_fs_dirty_inode,
- .write_inode = xfs_fs_write_inode,
- .evict_inode = xfs_fs_evict_inode,
- .put_super = xfs_fs_put_super,
- .sync_fs = xfs_fs_sync_fs,
- .freeze_fs = xfs_fs_freeze,
- .unfreeze_fs = xfs_fs_unfreeze,
- .statfs = xfs_fs_statfs,
- .remount_fs = xfs_fs_remount,
- .show_options = xfs_fs_show_options,
-};
-
-static struct file_system_type xfs_fs_type = {
- .owner = THIS_MODULE,
- .name = "xfs",
- .mount = xfs_fs_mount,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
-STATIC int __init
-xfs_init_zones(void)
-{
-
- xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
- if (!xfs_ioend_zone)
- goto out;
-
- xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
- xfs_ioend_zone);
- if (!xfs_ioend_pool)
- goto out_destroy_ioend_zone;
-
- xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
- "xfs_log_ticket");
- if (!xfs_log_ticket_zone)
- goto out_destroy_ioend_pool;
-
- xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
- "xfs_bmap_free_item");
- if (!xfs_bmap_free_item_zone)
- goto out_destroy_log_ticket_zone;
-
- xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
- "xfs_btree_cur");
- if (!xfs_btree_cur_zone)
- goto out_destroy_bmap_free_item_zone;
-
- xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
- "xfs_da_state");
- if (!xfs_da_state_zone)
- goto out_destroy_btree_cur_zone;
-
- xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
- if (!xfs_dabuf_zone)
- goto out_destroy_da_state_zone;
-
- xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
- if (!xfs_ifork_zone)
- goto out_destroy_dabuf_zone;
-
- xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
- if (!xfs_trans_zone)
- goto out_destroy_ifork_zone;
-
- xfs_log_item_desc_zone =
- kmem_zone_init(sizeof(struct xfs_log_item_desc),
- "xfs_log_item_desc");
- if (!xfs_log_item_desc_zone)
- goto out_destroy_trans_zone;
-
- /*
- * The size of the zone allocated buf log item is the maximum
- * size possible under XFS. This wastes a little bit of memory,
- * but it is much faster.
- */
- xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
- (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
- NBWORD) * sizeof(int))), "xfs_buf_item");
- if (!xfs_buf_item_zone)
- goto out_destroy_log_item_desc_zone;
-
- xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
- ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))), "xfs_efd_item");
- if (!xfs_efd_zone)
- goto out_destroy_buf_item_zone;
-
- xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
- ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))), "xfs_efi_item");
- if (!xfs_efi_zone)
- goto out_destroy_efd_zone;
-
- xfs_inode_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
- xfs_fs_inode_init_once);
- if (!xfs_inode_zone)
- goto out_destroy_efi_zone;
-
- xfs_ili_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
- KM_ZONE_SPREAD, NULL);
- if (!xfs_ili_zone)
- goto out_destroy_inode_zone;
-
- return 0;
-
- out_destroy_inode_zone:
- kmem_zone_destroy(xfs_inode_zone);
- out_destroy_efi_zone:
- kmem_zone_destroy(xfs_efi_zone);
- out_destroy_efd_zone:
- kmem_zone_destroy(xfs_efd_zone);
- out_destroy_buf_item_zone:
- kmem_zone_destroy(xfs_buf_item_zone);
- out_destroy_log_item_desc_zone:
- kmem_zone_destroy(xfs_log_item_desc_zone);
- out_destroy_trans_zone:
- kmem_zone_destroy(xfs_trans_zone);
- out_destroy_ifork_zone:
- kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
- kmem_zone_destroy(xfs_dabuf_zone);
- out_destroy_da_state_zone:
- kmem_zone_destroy(xfs_da_state_zone);
- out_destroy_btree_cur_zone:
- kmem_zone_destroy(xfs_btree_cur_zone);
- out_destroy_bmap_free_item_zone:
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- out_destroy_log_ticket_zone:
- kmem_zone_destroy(xfs_log_ticket_zone);
- out_destroy_ioend_pool:
- mempool_destroy(xfs_ioend_pool);
- out_destroy_ioend_zone:
- kmem_zone_destroy(xfs_ioend_zone);
- out:
- return -ENOMEM;
-}
-
-STATIC void
-xfs_destroy_zones(void)
-{
- kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_inode_zone);
- kmem_zone_destroy(xfs_efi_zone);
- kmem_zone_destroy(xfs_efd_zone);
- kmem_zone_destroy(xfs_buf_item_zone);
- kmem_zone_destroy(xfs_log_item_desc_zone);
- kmem_zone_destroy(xfs_trans_zone);
- kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_dabuf_zone);
- kmem_zone_destroy(xfs_da_state_zone);
- kmem_zone_destroy(xfs_btree_cur_zone);
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- kmem_zone_destroy(xfs_log_ticket_zone);
- mempool_destroy(xfs_ioend_pool);
- kmem_zone_destroy(xfs_ioend_zone);
-
-}
-
-STATIC int __init
-xfs_init_workqueues(void)
-{
- /*
- * max_active is set to 8 to give enough concurency to allow
- * multiple work operations on each CPU to run. This allows multiple
- * filesystems to be running sync work concurrently, and scales with
- * the number of CPUs in the system.
- */
- xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_CPU_INTENSIVE, 8);
- if (!xfs_syncd_wq)
- return -ENOMEM;
- return 0;
-}
-
-STATIC void
-xfs_destroy_workqueues(void)
-{
- destroy_workqueue(xfs_syncd_wq);
-}
-
-STATIC int __init
-init_xfs_fs(void)
-{
- int error;
-
- printk(KERN_INFO XFS_VERSION_STRING " with "
- XFS_BUILD_OPTIONS " enabled\n");
-
- xfs_ioend_init();
- xfs_dir_startup();
-
- error = xfs_init_zones();
- if (error)
- goto out;
-
- error = xfs_init_workqueues();
- if (error)
- goto out_destroy_zones;
-
- error = xfs_mru_cache_init();
- if (error)
- goto out_destroy_wq;
-
- error = xfs_filestream_init();
- if (error)
- goto out_mru_cache_uninit;
-
- error = xfs_buf_init();
- if (error)
- goto out_filestream_uninit;
-
- error = xfs_init_procfs();
- if (error)
- goto out_buf_terminate;
-
- error = xfs_sysctl_register();
- if (error)
- goto out_cleanup_procfs;
-
- vfs_initquota();
-
- error = register_filesystem(&xfs_fs_type);
- if (error)
- goto out_sysctl_unregister;
- return 0;
-
- out_sysctl_unregister:
- xfs_sysctl_unregister();
- out_cleanup_procfs:
- xfs_cleanup_procfs();
- out_buf_terminate:
- xfs_buf_terminate();
- out_filestream_uninit:
- xfs_filestream_uninit();
- out_mru_cache_uninit:
- xfs_mru_cache_uninit();
- out_destroy_wq:
- xfs_destroy_workqueues();
- out_destroy_zones:
- xfs_destroy_zones();
- out:
- return error;
-}
-
-STATIC void __exit
-exit_xfs_fs(void)
-{
- vfs_exitquota();
- unregister_filesystem(&xfs_fs_type);
- xfs_sysctl_unregister();
- xfs_cleanup_procfs();
- xfs_buf_terminate();
- xfs_filestream_uninit();
- xfs_mru_cache_uninit();
- xfs_destroy_workqueues();
- xfs_destroy_zones();
-}
-
-module_init(init_xfs_fs);
-module_exit(exit_xfs_fs);
-
-MODULE_AUTHOR("Silicon Graphics, Inc.");
-MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
-MODULE_LICENSE("GPL");
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
deleted file mode 100644
index 50a3266..0000000
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPER_H__
-#define __XFS_SUPER_H__
-
-#include <linux/exportfs.h>
-
-#ifdef CONFIG_XFS_QUOTA
-extern void xfs_qm_init(void);
-extern void xfs_qm_exit(void);
-# define vfs_initquota() xfs_qm_init()
-# define vfs_exitquota() xfs_qm_exit()
-#else
-# define vfs_initquota() do { } while (0)
-# define vfs_exitquota() do { } while (0)
-#endif
-
-#ifdef CONFIG_XFS_POSIX_ACL
-# define XFS_ACL_STRING "ACLs, "
-# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL)
-#else
-# define XFS_ACL_STRING
-# define set_posix_acl_flag(sb) do { } while (0)
-#endif
-
-#define XFS_SECURITY_STRING "security attributes, "
-
-#ifdef CONFIG_XFS_RT
-# define XFS_REALTIME_STRING "realtime, "
-#else
-# define XFS_REALTIME_STRING
-#endif
-
-#if XFS_BIG_BLKNOS
-# if XFS_BIG_INUMS
-# define XFS_BIGFS_STRING "large block/inode numbers, "
-# else
-# define XFS_BIGFS_STRING "large block numbers, "
-# endif
-#else
-# define XFS_BIGFS_STRING
-#endif
-
-#ifdef DEBUG
-# define XFS_DBG_STRING "debug"
-#else
-# define XFS_DBG_STRING "no debug"
-#endif
-
-#define XFS_VERSION_STRING "SGI XFS"
-#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
- XFS_SECURITY_STRING \
- XFS_REALTIME_STRING \
- XFS_BIGFS_STRING \
- XFS_DBG_STRING /* DBG must be last */
-
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_buftarg;
-struct block_device;
-
-extern __uint64_t xfs_max_file_offset(unsigned int);
-
-extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-
-extern const struct export_operations xfs_export_operations;
-extern const struct xattr_handler *xfs_xattr_handlers[];
-extern const struct quotactl_ops xfs_quotactl_operations;
-
-#define XFS_M(sb) ((struct xfs_mount *)((sb)->s_fs_info))
-
-#endif /* __XFS_SUPER_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
deleted file mode 100644
index 2f277a0..0000000
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ /dev/null
@@ -1,1132 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_dinode.h"
-#include "xfs_error.h"
-#include "xfs_filestream.h"
-#include "xfs_vnodeops.h"
-#include "xfs_inode_item.h"
-#include "xfs_quota.h"
-#include "xfs_trace.h"
-#include "xfs_fsops.h"
-
-#include <linux/kthread.h>
-#include <linux/freezer.h>
-
-struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
-
-/*
- * The inode lookup is done in batches to keep the amount of lock traffic and
- * radix tree lookups to a minimum. The batch size is a trade off between
- * lookup reduction and stack usage. This is in the reclaim path, so we can't
- * be too greedy.
- */
-#define XFS_LOOKUP_BATCH 32
-
-STATIC int
-xfs_inode_ag_walk_grab(
- struct xfs_inode *ip)
-{
- struct inode *inode = VFS_I(ip);
-
- ASSERT(rcu_read_lock_held());
-
- /*
- * check for stale RCU freed inode
- *
- * If the inode has been reallocated, it doesn't matter if it's not in
- * the AG we are walking - we are walking for writeback, so if it
- * passes all the "valid inode" checks and is dirty, then we'll write
- * it back anyway. If it has been reallocated and still being
- * initialised, the XFS_INEW check below will catch it.
- */
- spin_lock(&ip->i_flags_lock);
- if (!ip->i_ino)
- goto out_unlock_noent;
-
- /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
- goto out_unlock_noent;
- spin_unlock(&ip->i_flags_lock);
-
- /* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return EFSCORRUPTED;
-
- /* If we can't grab the inode, it must on it's way to reclaim. */
- if (!igrab(inode))
- return ENOENT;
-
- if (is_bad_inode(inode)) {
- IRELE(ip);
- return ENOENT;
- }
-
- /* inode is valid */
- return 0;
-
-out_unlock_noent:
- spin_unlock(&ip->i_flags_lock);
- return ENOENT;
-}
-
-STATIC int
-xfs_inode_ag_walk(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags),
- int flags)
-{
- uint32_t first_index;
- int last_error = 0;
- int skipped;
- int done;
- int nr_found;
-
-restart:
- done = 0;
- skipped = 0;
- first_index = 0;
- nr_found = 0;
- do {
- struct xfs_inode *batch[XFS_LOOKUP_BATCH];
- int error = 0;
- int i;
-
- rcu_read_lock();
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void **)batch, first_index,
- XFS_LOOKUP_BATCH);
- if (!nr_found) {
- rcu_read_unlock();
- break;
- }
-
- /*
- * Grab the inodes before we drop the lock. if we found
- * nothing, nr == 0 and the loop will be skipped.
- */
- for (i = 0; i < nr_found; i++) {
- struct xfs_inode *ip = batch[i];
-
- if (done || xfs_inode_ag_walk_grab(ip))
- batch[i] = NULL;
-
- /*
- * Update the index for the next lookup. Catch
- * overflows into the next AG range which can occur if
- * we have inodes in the last block of the AG and we
- * are currently pointing to the last inode.
- *
- * Because we may see inodes that are from the wrong AG
- * due to RCU freeing and reallocation, only update the
- * index if it lies in this AG. It was a race that lead
- * us to see this inode, so another lookup from the
- * same index will not find it again.
- */
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
- continue;
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- done = 1;
- }
-
- /* unlock now we've grabbed the inodes. */
- rcu_read_unlock();
-
- for (i = 0; i < nr_found; i++) {
- if (!batch[i])
- continue;
- error = execute(batch[i], pag, flags);
- IRELE(batch[i]);
- if (error == EAGAIN) {
- skipped++;
- continue;
- }
- if (error && last_error != EFSCORRUPTED)
- last_error = error;
- }
-
- /* bail out if the filesystem is corrupted. */
- if (error == EFSCORRUPTED)
- break;
-
- } while (nr_found && !done);
-
- if (skipped) {
- delay(1);
- goto restart;
- }
- return last_error;
-}
-
-int
-xfs_inode_ag_iterator(
- struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip,
- struct xfs_perag *pag, int flags),
- int flags)
-{
- struct xfs_perag *pag;
- int error = 0;
- int last_error = 0;
- xfs_agnumber_t ag;
-
- ag = 0;
- while ((pag = xfs_perag_get(mp, ag))) {
- ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags);
- xfs_perag_put(pag);
- if (error) {
- last_error = error;
- if (error == EFSCORRUPTED)
- break;
- }
- }
- return XFS_ERROR(last_error);
-}
-
-STATIC int
-xfs_sync_inode_data(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int flags)
-{
- struct inode *inode = VFS_I(ip);
- struct address_space *mapping = inode->i_mapping;
- int error = 0;
-
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- goto out_wait;
-
- if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) {
- if (flags & SYNC_TRYLOCK)
- goto out_wait;
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- }
-
- error = xfs_flush_pages(ip, 0, -1, (flags & SYNC_WAIT) ?
- 0 : XBF_ASYNC, FI_NONE);
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
-
- out_wait:
- if (flags & SYNC_WAIT)
- xfs_ioend_wait(ip);
- return error;
-}
-
-STATIC int
-xfs_sync_inode_attr(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int flags)
-{
- int error = 0;
-
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- if (xfs_inode_clean(ip))
- goto out_unlock;
- if (!xfs_iflock_nowait(ip)) {
- if (!(flags & SYNC_WAIT))
- goto out_unlock;
- xfs_iflock(ip);
- }
-
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- goto out_unlock;
- }
-
- error = xfs_iflush(ip, flags);
-
- /*
- * We don't want to try again on non-blocking flushes that can't run
- * again immediately. If an inode really must be written, then that's
- * what the SYNC_WAIT flag is for.
- */
- if (error == EAGAIN) {
- ASSERT(!(flags & SYNC_WAIT));
- error = 0;
- }
-
- out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- return error;
-}
-
-/*
- * Write out pagecache data for the whole filesystem.
- */
-STATIC int
-xfs_sync_data(
- struct xfs_mount *mp,
- int flags)
-{
- int error;
-
- ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
-
- error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
- if (error)
- return XFS_ERROR(error);
-
- xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
- return 0;
-}
-
-/*
- * Write out inode metadata (attributes) for the whole filesystem.
- */
-STATIC int
-xfs_sync_attr(
- struct xfs_mount *mp,
- int flags)
-{
- ASSERT((flags & ~SYNC_WAIT) == 0);
-
- return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
-}
-
-STATIC int
-xfs_sync_fsdata(
- struct xfs_mount *mp)
-{
- struct xfs_buf *bp;
-
- /*
- * If the buffer is pinned then push on the log so we won't get stuck
- * waiting in the write for someone, maybe ourselves, to flush the log.
- *
- * Even though we just pushed the log above, we did not have the
- * superblock buffer locked at that point so it can become pinned in
- * between there and here.
- */
- bp = xfs_getsb(mp, 0);
- if (XFS_BUF_ISPINNED(bp))
- xfs_log_force(mp, 0);
-
- return xfs_bwrite(mp, bp);
-}
-
-int
-xfs_log_dirty_inode(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int flags)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- if (!ip->i_update_core)
- return 0;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- return xfs_trans_commit(tp, 0);
-}
-
-/*
- * When remounting a filesystem read-only or freezing the filesystem, we have
- * two phases to execute. This first phase is syncing the data before we
- * quiesce the filesystem, and the second is flushing all the inodes out after
- * we've waited for all the transactions created by the first phase to
- * complete. The second phase ensures that the inodes are written to their
- * location on disk rather than just existing in transactions in the log. This
- * means after a quiesce there is no log replay required to write the inodes to
- * disk (this is the main difference between a sync and a quiesce).
- */
-/*
- * First stage of freeze - no writers will make progress now we are here,
- * so we flush delwri and delalloc buffers here, then wait for all I/O to
- * complete. Data is frozen at that point. Metadata is not frozen,
- * transactions can still occur here so don't bother flushing the buftarg
- * because it'll just get dirty again.
- */
-int
-xfs_quiesce_data(
- struct xfs_mount *mp)
-{
- int error, error2 = 0;
-
- /* push non-blocking */
- xfs_sync_data(mp, 0);
- xfs_qm_sync(mp, SYNC_TRYLOCK);
-
- /* push and block till complete */
- xfs_sync_data(mp, SYNC_WAIT);
-
- /*
- * Log all pending size and timestamp updates. The vfs writeback
- * code is supposed to do this, but due to its overagressive
- * livelock detection it will skip inodes where appending writes
- * were written out in the first non-blocking sync phase if their
- * completion took long enough that it happened after taking the
- * timestamp for the cut-off in the blocking phase.
- */
- xfs_inode_ag_iterator(mp, xfs_log_dirty_inode, 0);
-
- xfs_qm_sync(mp, SYNC_WAIT);
-
- /* write superblock and hoover up shutdown errors */
- error = xfs_sync_fsdata(mp);
-
- /* make sure all delwri buffers are written out */
- xfs_flush_buftarg(mp->m_ddev_targp, 1);
-
- /* mark the log as covered if needed */
- if (xfs_log_need_covered(mp))
- error2 = xfs_fs_log_dummy(mp);
-
- /* flush data-only devices */
- if (mp->m_rtdev_targp)
- XFS_bflush(mp->m_rtdev_targp);
-
- return error ? error : error2;
-}
-
-STATIC void
-xfs_quiesce_fs(
- struct xfs_mount *mp)
-{
- int count = 0, pincount;
-
- xfs_reclaim_inodes(mp, 0);
- xfs_flush_buftarg(mp->m_ddev_targp, 0);
-
- /*
- * This loop must run at least twice. The first instance of the loop
- * will flush most meta data but that will generate more meta data
- * (typically directory updates). Which then must be flushed and
- * logged before we can write the unmount record. We also so sync
- * reclaim of inodes to catch any that the above delwri flush skipped.
- */
- do {
- xfs_reclaim_inodes(mp, SYNC_WAIT);
- xfs_sync_attr(mp, SYNC_WAIT);
- pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1);
- if (!pincount) {
- delay(50);
- count++;
- }
- } while (count < 2);
-}
-
-/*
- * Second stage of a quiesce. The data is already synced, now we have to take
- * care of the metadata. New transactions are already blocked, so we need to
- * wait for any remaining transactions to drain out before proceeding.
- */
-void
-xfs_quiesce_attr(
- struct xfs_mount *mp)
-{
- int error = 0;
-
- /* wait for all modifications to complete */
- while (atomic_read(&mp->m_active_trans) > 0)
- delay(100);
-
- /* flush inodes and push all remaining buffers out to disk */
- xfs_quiesce_fs(mp);
-
- /*
- * Just warn here till VFS can correctly support
- * read-only remount without racing.
- */
- WARN_ON(atomic_read(&mp->m_active_trans) != 0);
-
- /* Push the superblock and write an unmount record */
- error = xfs_log_sbcount(mp, 1);
- if (error)
- xfs_warn(mp, "xfs_attr_quiesce: failed to log sb changes. "
- "Frozen image may not be consistent.");
- xfs_log_unmount_write(mp);
- xfs_unmountfs_writesb(mp);
-}
-
-static void
-xfs_syncd_queue_sync(
- struct xfs_mount *mp)
-{
- queue_delayed_work(xfs_syncd_wq, &mp->m_sync_work,
- msecs_to_jiffies(xfs_syncd_centisecs * 10));
-}
-
-/*
- * Every sync period we need to unpin all items, reclaim inodes and sync
- * disk quotas. We might need to cover the log to indicate that the
- * filesystem is idle and not frozen.
- */
-STATIC void
-xfs_sync_worker(
- struct work_struct *work)
-{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_sync_work);
- int error;
-
- if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
- /* dgc: errors ignored here */
- if (mp->m_super->s_frozen == SB_UNFROZEN &&
- xfs_log_need_covered(mp))
- error = xfs_fs_log_dummy(mp);
- else
- xfs_log_force(mp, 0);
- error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-
- /* start pushing all the metadata that is currently dirty */
- xfs_ail_push_all(mp->m_ail);
- }
-
- /* queue us up again */
- xfs_syncd_queue_sync(mp);
-}
-
-/*
- * Queue a new inode reclaim pass if there are reclaimable inodes and there
- * isn't a reclaim pass already in progress. By default it runs every 5s based
- * on the xfs syncd work default of 30s. Perhaps this should have it's own
- * tunable, but that can be done if this method proves to be ineffective or too
- * aggressive.
- */
-static void
-xfs_syncd_queue_reclaim(
- struct xfs_mount *mp)
-{
-
- /*
- * We can have inodes enter reclaim after we've shut down the syncd
- * workqueue during unmount, so don't allow reclaim work to be queued
- * during unmount.
- */
- if (!(mp->m_super->s_flags & MS_ACTIVE))
- return;
-
- rcu_read_lock();
- if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) {
- queue_delayed_work(xfs_syncd_wq, &mp->m_reclaim_work,
- msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
- }
- rcu_read_unlock();
-}
-
-/*
- * This is a fast pass over the inode cache to try to get reclaim moving on as
- * many inodes as possible in a short period of time. It kicks itself every few
- * seconds, as well as being kicked by the inode cache shrinker when memory
- * goes low. It scans as quickly as possible avoiding locked inodes or those
- * already being flushed, and once done schedules a future pass.
- */
-STATIC void
-xfs_reclaim_worker(
- struct work_struct *work)
-{
- struct xfs_mount *mp = container_of(to_delayed_work(work),
- struct xfs_mount, m_reclaim_work);
-
- xfs_reclaim_inodes(mp, SYNC_TRYLOCK);
- xfs_syncd_queue_reclaim(mp);
-}
-
-/*
- * Flush delayed allocate data, attempting to free up reserved space
- * from existing allocations. At this point a new allocation attempt
- * has failed with ENOSPC and we are in the process of scratching our
- * heads, looking about for more room.
- *
- * Queue a new data flush if there isn't one already in progress and
- * wait for completion of the flush. This means that we only ever have one
- * inode flush in progress no matter how many ENOSPC events are occurring and
- * so will prevent the system from bogging down due to every concurrent
- * ENOSPC event scanning all the active inodes in the system for writeback.
- */
-void
-xfs_flush_inodes(
- struct xfs_inode *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
-
- queue_work(xfs_syncd_wq, &mp->m_flush_work);
- flush_work_sync(&mp->m_flush_work);
-}
-
-STATIC void
-xfs_flush_worker(
- struct work_struct *work)
-{
- struct xfs_mount *mp = container_of(work,
- struct xfs_mount, m_flush_work);
-
- xfs_sync_data(mp, SYNC_TRYLOCK);
- xfs_sync_data(mp, SYNC_TRYLOCK | SYNC_WAIT);
-}
-
-int
-xfs_syncd_init(
- struct xfs_mount *mp)
-{
- INIT_WORK(&mp->m_flush_work, xfs_flush_worker);
- INIT_DELAYED_WORK(&mp->m_sync_work, xfs_sync_worker);
- INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-
- xfs_syncd_queue_sync(mp);
- xfs_syncd_queue_reclaim(mp);
-
- return 0;
-}
-
-void
-xfs_syncd_stop(
- struct xfs_mount *mp)
-{
- cancel_delayed_work_sync(&mp->m_sync_work);
- cancel_delayed_work_sync(&mp->m_reclaim_work);
- cancel_work_sync(&mp->m_flush_work);
-}
-
-void
-__xfs_inode_set_reclaim_tag(
- struct xfs_perag *pag,
- struct xfs_inode *ip)
-{
- radix_tree_tag_set(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino),
- XFS_ICI_RECLAIM_TAG);
-
- if (!pag->pag_ici_reclaimable) {
- /* propagate the reclaim tag up into the perag radix tree */
- spin_lock(&ip->i_mount->m_perag_lock);
- radix_tree_tag_set(&ip->i_mount->m_perag_tree,
- XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
- XFS_ICI_RECLAIM_TAG);
- spin_unlock(&ip->i_mount->m_perag_lock);
-
- /* schedule periodic background inode reclaim */
- xfs_syncd_queue_reclaim(ip->i_mount);
-
- trace_xfs_perag_set_reclaim(ip->i_mount, pag->pag_agno,
- -1, _RET_IP_);
- }
- pag->pag_ici_reclaimable++;
-}
-
-/*
- * We set the inode flag atomically with the radix tree tag.
- * Once we get tag lookups on the radix tree, this inode flag
- * can go away.
- */
-void
-xfs_inode_set_reclaim_tag(
- xfs_inode_t *ip)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_perag *pag;
-
- pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
- spin_lock(&pag->pag_ici_lock);
- spin_lock(&ip->i_flags_lock);
- __xfs_inode_set_reclaim_tag(pag, ip);
- __xfs_iflags_set(ip, XFS_IRECLAIMABLE);
- spin_unlock(&ip->i_flags_lock);
- spin_unlock(&pag->pag_ici_lock);
- xfs_perag_put(pag);
-}
-
-STATIC void
-__xfs_inode_clear_reclaim(
- xfs_perag_t *pag,
- xfs_inode_t *ip)
-{
- pag->pag_ici_reclaimable--;
- if (!pag->pag_ici_reclaimable) {
- /* clear the reclaim tag from the perag radix tree */
- spin_lock(&ip->i_mount->m_perag_lock);
- radix_tree_tag_clear(&ip->i_mount->m_perag_tree,
- XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino),
- XFS_ICI_RECLAIM_TAG);
- spin_unlock(&ip->i_mount->m_perag_lock);
- trace_xfs_perag_clear_reclaim(ip->i_mount, pag->pag_agno,
- -1, _RET_IP_);
- }
-}
-
-void
-__xfs_inode_clear_reclaim_tag(
- xfs_mount_t *mp,
- xfs_perag_t *pag,
- xfs_inode_t *ip)
-{
- radix_tree_tag_clear(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
- __xfs_inode_clear_reclaim(pag, ip);
-}
-
-/*
- * Grab the inode for reclaim exclusively.
- * Return 0 if we grabbed it, non-zero otherwise.
- */
-STATIC int
-xfs_reclaim_inode_grab(
- struct xfs_inode *ip,
- int flags)
-{
- ASSERT(rcu_read_lock_held());
-
- /* quick check for stale RCU freed inode */
- if (!ip->i_ino)
- return 1;
-
- /*
- * do some unlocked checks first to avoid unnecessary lock traffic.
- * The first is a flush lock check, the second is a already in reclaim
- * check. Only do these checks if we are not going to block on locks.
- */
- if ((flags & SYNC_TRYLOCK) &&
- (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
- return 1;
- }
-
- /*
- * The radix tree lock here protects a thread in xfs_iget from racing
- * with us starting reclaim on the inode. Once we have the
- * XFS_IRECLAIM flag set it will not touch us.
- *
- * Due to RCU lookup, we may find inodes that have been freed and only
- * have XFS_IRECLAIM set. Indeed, we may see reallocated inodes that
- * aren't candidates for reclaim at all, so we must check the
- * XFS_IRECLAIMABLE is set first before proceeding to reclaim.
- */
- spin_lock(&ip->i_flags_lock);
- if (!__xfs_iflags_test(ip, XFS_IRECLAIMABLE) ||
- __xfs_iflags_test(ip, XFS_IRECLAIM)) {
- /* not a reclaim candidate. */
- spin_unlock(&ip->i_flags_lock);
- return 1;
- }
- __xfs_iflags_set(ip, XFS_IRECLAIM);
- spin_unlock(&ip->i_flags_lock);
- return 0;
-}
-
-/*
- * Inodes in different states need to be treated differently, and the return
- * value of xfs_iflush is not sufficient to get this right. The following table
- * lists the inode states and the reclaim actions necessary for non-blocking
- * reclaim:
- *
- *
- * inode state iflush ret required action
- * --------------- ---------- ---------------
- * bad - reclaim
- * shutdown EIO unpin and reclaim
- * clean, unpinned 0 reclaim
- * stale, unpinned 0 reclaim
- * clean, pinned(*) 0 requeue
- * stale, pinned EAGAIN requeue
- * dirty, delwri ok 0 requeue
- * dirty, delwri blocked EAGAIN requeue
- * dirty, sync flush 0 reclaim
- *
- * (*) dgc: I don't think the clean, pinned state is possible but it gets
- * handled anyway given the order of checks implemented.
- *
- * As can be seen from the table, the return value of xfs_iflush() is not
- * sufficient to correctly decide the reclaim action here. The checks in
- * xfs_iflush() might look like duplicates, but they are not.
- *
- * Also, because we get the flush lock first, we know that any inode that has
- * been flushed delwri has had the flush completed by the time we check that
- * the inode is clean. The clean inode check needs to be done before flushing
- * the inode delwri otherwise we would loop forever requeuing clean inodes as
- * we cannot tell apart a successful delwri flush and a clean inode from the
- * return value of xfs_iflush().
- *
- * Note that because the inode is flushed delayed write by background
- * writeback, the flush lock may already be held here and waiting on it can
- * result in very long latencies. Hence for sync reclaims, where we wait on the
- * flush lock, the caller should push out delayed write inodes first before
- * trying to reclaim them to minimise the amount of time spent waiting. For
- * background relaim, we just requeue the inode for the next pass.
- *
- * Hence the order of actions after gaining the locks should be:
- * bad => reclaim
- * shutdown => unpin and reclaim
- * pinned, delwri => requeue
- * pinned, sync => unpin
- * stale => reclaim
- * clean => reclaim
- * dirty, delwri => flush and requeue
- * dirty, sync => flush, wait and reclaim
- */
-STATIC int
-xfs_reclaim_inode(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int sync_mode)
-{
- int error;
-
-restart:
- error = 0;
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (!xfs_iflock_nowait(ip)) {
- if (!(sync_mode & SYNC_WAIT))
- goto out;
-
- /*
- * If we only have a single dirty inode in a cluster there is
- * a fair chance that the AIL push may have pushed it into
- * the buffer, but xfsbufd won't touch it until 30 seconds
- * from now, and thus we will lock up here.
- *
- * Promote the inode buffer to the front of the delwri list
- * and wake up xfsbufd now.
- */
- xfs_promote_inode(ip);
- xfs_iflock(ip);
- }
-
- if (is_bad_inode(VFS_I(ip)))
- goto reclaim;
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_iunpin_wait(ip);
- goto reclaim;
- }
- if (xfs_ipincount(ip)) {
- if (!(sync_mode & SYNC_WAIT)) {
- xfs_ifunlock(ip);
- goto out;
- }
- xfs_iunpin_wait(ip);
- }
- if (xfs_iflags_test(ip, XFS_ISTALE))
- goto reclaim;
- if (xfs_inode_clean(ip))
- goto reclaim;
-
- /*
- * Now we have an inode that needs flushing.
- *
- * We do a nonblocking flush here even if we are doing a SYNC_WAIT
- * reclaim as we can deadlock with inode cluster removal.
- * xfs_ifree_cluster() can lock the inode buffer before it locks the
- * ip->i_lock, and we are doing the exact opposite here. As a result,
- * doing a blocking xfs_itobp() to get the cluster buffer will result
- * in an ABBA deadlock with xfs_ifree_cluster().
- *
- * As xfs_ifree_cluser() must gather all inodes that are active in the
- * cache to mark them stale, if we hit this case we don't actually want
- * to do IO here - we want the inode marked stale so we can simply
- * reclaim it. Hence if we get an EAGAIN error on a SYNC_WAIT flush,
- * just unlock the inode, back off and try again. Hopefully the next
- * pass through will see the stale flag set on the inode.
- */
- error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
- if (sync_mode & SYNC_WAIT) {
- if (error == EAGAIN) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /* backoff longer than in xfs_ifree_cluster */
- delay(2);
- goto restart;
- }
- xfs_iflock(ip);
- goto reclaim;
- }
-
- /*
- * When we have to flush an inode but don't have SYNC_WAIT set, we
- * flush the inode out using a delwri buffer and wait for the next
- * call into reclaim to find it in a clean state instead of waiting for
- * it now. We also don't return errors here - if the error is transient
- * then the next reclaim pass will flush the inode, and if the error
- * is permanent then the next sync reclaim will reclaim the inode and
- * pass on the error.
- */
- if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_warn(ip->i_mount,
- "inode 0x%llx background reclaim flush failed with %d",
- (long long)ip->i_ino, error);
- }
-out:
- xfs_iflags_clear(ip, XFS_IRECLAIM);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * We could return EAGAIN here to make reclaim rescan the inode tree in
- * a short while. However, this just burns CPU time scanning the tree
- * waiting for IO to complete and xfssyncd never goes back to the idle
- * state. Instead, return 0 to let the next scheduled background reclaim
- * attempt to reclaim the inode again.
- */
- return 0;
-
-reclaim:
- xfs_ifunlock(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- XFS_STATS_INC(xs_ig_reclaims);
- /*
- * Remove the inode from the per-AG radix tree.
- *
- * Because radix_tree_delete won't complain even if the item was never
- * added to the tree assert that it's been there before to catch
- * problems with the inode life time early on.
- */
- spin_lock(&pag->pag_ici_lock);
- if (!radix_tree_delete(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
- ASSERT(0);
- __xfs_inode_clear_reclaim(pag, ip);
- spin_unlock(&pag->pag_ici_lock);
-
- /*
- * Here we do an (almost) spurious inode lock in order to coordinate
- * with inode cache radix tree lookups. This is because the lookup
- * can reference the inodes in the cache without taking references.
- *
- * We make that OK here by ensuring that we wait until the inode is
- * unlocked after the lookup before we go ahead and free it. We get
- * both the ilock and the iolock because the code may need to drop the
- * ilock one but will still hold the iolock.
- */
- xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_qm_dqdetach(ip);
- xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
- xfs_inode_free(ip);
- return error;
-
-}
-
-/*
- * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
- * corrupted, we still want to try to reclaim all the inodes. If we don't,
- * then a shut down during filesystem unmount reclaim walk leak all the
- * unreclaimed inodes.
- */
-int
-xfs_reclaim_inodes_ag(
- struct xfs_mount *mp,
- int flags,
- int *nr_to_scan)
-{
- struct xfs_perag *pag;
- int error = 0;
- int last_error = 0;
- xfs_agnumber_t ag;
- int trylock = flags & SYNC_TRYLOCK;
- int skipped;
-
-restart:
- ag = 0;
- skipped = 0;
- while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
- unsigned long first_index = 0;
- int done = 0;
- int nr_found = 0;
-
- ag = pag->pag_agno + 1;
-
- if (trylock) {
- if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
- skipped++;
- xfs_perag_put(pag);
- continue;
- }
- first_index = pag->pag_ici_reclaim_cursor;
- } else
- mutex_lock(&pag->pag_ici_reclaim_lock);
-
- do {
- struct xfs_inode *batch[XFS_LOOKUP_BATCH];
- int i;
-
- rcu_read_lock();
- nr_found = radix_tree_gang_lookup_tag(
- &pag->pag_ici_root,
- (void **)batch, first_index,
- XFS_LOOKUP_BATCH,
- XFS_ICI_RECLAIM_TAG);
- if (!nr_found) {
- done = 1;
- rcu_read_unlock();
- break;
- }
-
- /*
- * Grab the inodes before we drop the lock. if we found
- * nothing, nr == 0 and the loop will be skipped.
- */
- for (i = 0; i < nr_found; i++) {
- struct xfs_inode *ip = batch[i];
-
- if (done || xfs_reclaim_inode_grab(ip, flags))
- batch[i] = NULL;
-
- /*
- * Update the index for the next lookup. Catch
- * overflows into the next AG range which can
- * occur if we have inodes in the last block of
- * the AG and we are currently pointing to the
- * last inode.
- *
- * Because we may see inodes that are from the
- * wrong AG due to RCU freeing and
- * reallocation, only update the index if it
- * lies in this AG. It was a race that lead us
- * to see this inode, so another lookup from
- * the same index will not find it again.
- */
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) !=
- pag->pag_agno)
- continue;
- first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- done = 1;
- }
-
- /* unlock now we've grabbed the inodes. */
- rcu_read_unlock();
-
- for (i = 0; i < nr_found; i++) {
- if (!batch[i])
- continue;
- error = xfs_reclaim_inode(batch[i], pag, flags);
- if (error && last_error != EFSCORRUPTED)
- last_error = error;
- }
-
- *nr_to_scan -= XFS_LOOKUP_BATCH;
-
- } while (nr_found && !done && *nr_to_scan > 0);
-
- if (trylock && !done)
- pag->pag_ici_reclaim_cursor = first_index;
- else
- pag->pag_ici_reclaim_cursor = 0;
- mutex_unlock(&pag->pag_ici_reclaim_lock);
- xfs_perag_put(pag);
- }
-
- /*
- * if we skipped any AG, and we still have scan count remaining, do
- * another pass this time using blocking reclaim semantics (i.e
- * waiting on the reclaim locks and ignoring the reclaim cursors). This
- * ensure that when we get more reclaimers than AGs we block rather
- * than spin trying to execute reclaim.
- */
- if (trylock && skipped && *nr_to_scan > 0) {
- trylock = 0;
- goto restart;
- }
- return XFS_ERROR(last_error);
-}
-
-int
-xfs_reclaim_inodes(
- xfs_mount_t *mp,
- int mode)
-{
- int nr_to_scan = INT_MAX;
-
- return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
-}
-
-/*
- * Inode cache shrinker.
- *
- * When called we make sure that there is a background (fast) inode reclaim in
- * progress, while we will throttle the speed of reclaim via doiing synchronous
- * reclaim of inodes. That means if we come across dirty inodes, we wait for
- * them to be cleaned, which we hope will not be very long due to the
- * background walker having already kicked the IO off on those dirty inodes.
- */
-static int
-xfs_reclaim_inode_shrink(
- struct shrinker *shrink,
- struct shrink_control *sc)
-{
- struct xfs_mount *mp;
- struct xfs_perag *pag;
- xfs_agnumber_t ag;
- int reclaimable;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
-
- mp = container_of(shrink, struct xfs_mount, m_inode_shrink);
- if (nr_to_scan) {
- /* kick background reclaimer and push the AIL */
- xfs_syncd_queue_reclaim(mp);
- xfs_ail_push_all(mp->m_ail);
-
- if (!(gfp_mask & __GFP_FS))
- return -1;
-
- xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT,
- &nr_to_scan);
- /* terminate if we don't exhaust the scan */
- if (nr_to_scan > 0)
- return -1;
- }
-
- reclaimable = 0;
- ag = 0;
- while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
- ag = pag->pag_agno + 1;
- reclaimable += pag->pag_ici_reclaimable;
- xfs_perag_put(pag);
- }
- return reclaimable;
-}
-
-void
-xfs_inode_shrinker_register(
- struct xfs_mount *mp)
-{
- mp->m_inode_shrink.shrink = xfs_reclaim_inode_shrink;
- mp->m_inode_shrink.seeks = DEFAULT_SEEKS;
- register_shrinker(&mp->m_inode_shrink);
-}
-
-void
-xfs_inode_shrinker_unregister(
- struct xfs_mount *mp)
-{
- unregister_shrinker(&mp->m_inode_shrink);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
deleted file mode 100644
index ef5b2ce..0000000
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef XFS_SYNC_H
-#define XFS_SYNC_H 1
-
-struct xfs_mount;
-struct xfs_perag;
-
-typedef struct xfs_sync_work {
- struct list_head w_list;
- struct xfs_mount *w_mount;
- void *w_data; /* syncer routine argument */
- void (*w_syncer)(struct xfs_mount *, void *);
- struct completion *w_completion;
-} xfs_sync_work_t;
-
-#define SYNC_WAIT 0x0001 /* wait for i/o to complete */
-#define SYNC_TRYLOCK 0x0002 /* only try to lock inodes */
-
-extern struct workqueue_struct *xfs_syncd_wq; /* sync workqueue */
-
-int xfs_syncd_init(struct xfs_mount *mp);
-void xfs_syncd_stop(struct xfs_mount *mp);
-
-int xfs_quiesce_data(struct xfs_mount *mp);
-void xfs_quiesce_attr(struct xfs_mount *mp);
-
-void xfs_flush_inodes(struct xfs_inode *ip);
-
-int xfs_log_dirty_inode(struct xfs_inode *ip, struct xfs_perag *pag, int flags);
-
-int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
-
-void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
-void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
-void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
- struct xfs_inode *ip);
-
-int xfs_sync_inode_grab(struct xfs_inode *ip);
-int xfs_inode_ag_iterator(struct xfs_mount *mp,
- int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags);
-
-void xfs_inode_shrinker_register(struct xfs_mount *mp);
-void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
-
-#endif
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.c b/fs/xfs/linux-2.6/xfs_sysctl.c
deleted file mode 100644
index ee2d2ad..0000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.c
+++ /dev/null
@@ -1,252 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include "xfs_error.h"
-
-static struct ctl_table_header *xfs_table_header;
-
-#ifdef CONFIG_PROC_FS
-STATIC int
-xfs_stats_clear_proc_handler(
- ctl_table *ctl,
- int write,
- void __user *buffer,
- size_t *lenp,
- loff_t *ppos)
-{
- int c, ret, *valp = ctl->data;
- __uint32_t vn_active;
-
- ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-
- if (!ret && write && *valp) {
- xfs_notice(NULL, "Clearing xfsstats");
- for_each_possible_cpu(c) {
- preempt_disable();
- /* save vn_active, it's a universal truth! */
- vn_active = per_cpu(xfsstats, c).vn_active;
- memset(&per_cpu(xfsstats, c), 0,
- sizeof(struct xfsstats));
- per_cpu(xfsstats, c).vn_active = vn_active;
- preempt_enable();
- }
- xfs_stats_clear = 0;
- }
-
- return ret;
-}
-
-STATIC int
-xfs_panic_mask_proc_handler(
- ctl_table *ctl,
- int write,
- void __user *buffer,
- size_t *lenp,
- loff_t *ppos)
-{
- int ret, *valp = ctl->data;
-
- ret = proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
- if (!ret && write) {
- xfs_panic_mask = *valp;
-#ifdef DEBUG
- xfs_panic_mask |= (XFS_PTAG_SHUTDOWN_CORRUPT | XFS_PTAG_LOGRES);
-#endif
- }
- return ret;
-}
-#endif /* CONFIG_PROC_FS */
-
-static ctl_table xfs_table[] = {
- {
- .procname = "irix_sgid_inherit",
- .data = &xfs_params.sgid_inherit.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.sgid_inherit.min,
- .extra2 = &xfs_params.sgid_inherit.max
- },
- {
- .procname = "irix_symlink_mode",
- .data = &xfs_params.symlink_mode.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.symlink_mode.min,
- .extra2 = &xfs_params.symlink_mode.max
- },
- {
- .procname = "panic_mask",
- .data = &xfs_params.panic_mask.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = xfs_panic_mask_proc_handler,
- .extra1 = &xfs_params.panic_mask.min,
- .extra2 = &xfs_params.panic_mask.max
- },
-
- {
- .procname = "error_level",
- .data = &xfs_params.error_level.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.error_level.min,
- .extra2 = &xfs_params.error_level.max
- },
- {
- .procname = "xfssyncd_centisecs",
- .data = &xfs_params.syncd_timer.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.syncd_timer.min,
- .extra2 = &xfs_params.syncd_timer.max
- },
- {
- .procname = "inherit_sync",
- .data = &xfs_params.inherit_sync.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.inherit_sync.min,
- .extra2 = &xfs_params.inherit_sync.max
- },
- {
- .procname = "inherit_nodump",
- .data = &xfs_params.inherit_nodump.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.inherit_nodump.min,
- .extra2 = &xfs_params.inherit_nodump.max
- },
- {
- .procname = "inherit_noatime",
- .data = &xfs_params.inherit_noatim.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.inherit_noatim.min,
- .extra2 = &xfs_params.inherit_noatim.max
- },
- {
- .procname = "xfsbufd_centisecs",
- .data = &xfs_params.xfs_buf_timer.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.xfs_buf_timer.min,
- .extra2 = &xfs_params.xfs_buf_timer.max
- },
- {
- .procname = "age_buffer_centisecs",
- .data = &xfs_params.xfs_buf_age.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.xfs_buf_age.min,
- .extra2 = &xfs_params.xfs_buf_age.max
- },
- {
- .procname = "inherit_nosymlinks",
- .data = &xfs_params.inherit_nosym.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.inherit_nosym.min,
- .extra2 = &xfs_params.inherit_nosym.max
- },
- {
- .procname = "rotorstep",
- .data = &xfs_params.rotorstep.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.rotorstep.min,
- .extra2 = &xfs_params.rotorstep.max
- },
- {
- .procname = "inherit_nodefrag",
- .data = &xfs_params.inherit_nodfrg.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.inherit_nodfrg.min,
- .extra2 = &xfs_params.inherit_nodfrg.max
- },
- {
- .procname = "filestream_centisecs",
- .data = &xfs_params.fstrm_timer.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &xfs_params.fstrm_timer.min,
- .extra2 = &xfs_params.fstrm_timer.max,
- },
- /* please keep this the last entry */
-#ifdef CONFIG_PROC_FS
- {
- .procname = "stats_clear",
- .data = &xfs_params.stats_clear.val,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = xfs_stats_clear_proc_handler,
- .extra1 = &xfs_params.stats_clear.min,
- .extra2 = &xfs_params.stats_clear.max
- },
-#endif /* CONFIG_PROC_FS */
-
- {}
-};
-
-static ctl_table xfs_dir_table[] = {
- {
- .procname = "xfs",
- .mode = 0555,
- .child = xfs_table
- },
- {}
-};
-
-static ctl_table xfs_root_table[] = {
- {
- .procname = "fs",
- .mode = 0555,
- .child = xfs_dir_table
- },
- {}
-};
-
-int
-xfs_sysctl_register(void)
-{
- xfs_table_header = register_sysctl_table(xfs_root_table);
- if (!xfs_table_header)
- return -ENOMEM;
- return 0;
-}
-
-void
-xfs_sysctl_unregister(void)
-{
- unregister_sysctl_table(xfs_table_header);
-}
diff --git a/fs/xfs/linux-2.6/xfs_sysctl.h b/fs/xfs/linux-2.6/xfs_sysctl.h
deleted file mode 100644
index b9937d4..0000000
--- a/fs/xfs/linux-2.6/xfs_sysctl.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2001-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SYSCTL_H__
-#define __XFS_SYSCTL_H__
-
-#include <linux/sysctl.h>
-
-/*
- * Tunable xfs parameters
- */
-
-typedef struct xfs_sysctl_val {
- int min;
- int val;
- int max;
-} xfs_sysctl_val_t;
-
-typedef struct xfs_param {
- xfs_sysctl_val_t sgid_inherit; /* Inherit S_ISGID if process' GID is
- * not a member of parent dir GID. */
- xfs_sysctl_val_t symlink_mode; /* Link creat mode affected by umask */
- xfs_sysctl_val_t panic_mask; /* bitmask to cause panic on errors. */
- xfs_sysctl_val_t error_level; /* Degree of reporting for problems */
- xfs_sysctl_val_t syncd_timer; /* Interval between xfssyncd wakeups */
- xfs_sysctl_val_t stats_clear; /* Reset all XFS statistics to zero. */
- xfs_sysctl_val_t inherit_sync; /* Inherit the "sync" inode flag. */
- xfs_sysctl_val_t inherit_nodump;/* Inherit the "nodump" inode flag. */
- xfs_sysctl_val_t inherit_noatim;/* Inherit the "noatime" inode flag. */
- xfs_sysctl_val_t xfs_buf_timer; /* Interval between xfsbufd wakeups. */
- xfs_sysctl_val_t xfs_buf_age; /* Metadata buffer age before flush. */
- xfs_sysctl_val_t inherit_nosym; /* Inherit the "nosymlinks" flag. */
- xfs_sysctl_val_t rotorstep; /* inode32 AG rotoring control knob */
- xfs_sysctl_val_t inherit_nodfrg;/* Inherit the "nodefrag" inode flag. */
- xfs_sysctl_val_t fstrm_timer; /* Filestream dir-AG assoc'n timeout. */
-} xfs_param_t;
-
-/*
- * xfs_error_level:
- *
- * How much error reporting will be done when internal problems are
- * encountered. These problems normally return an EFSCORRUPTED to their
- * caller, with no other information reported.
- *
- * 0 No error reports
- * 1 Report EFSCORRUPTED errors that will cause a filesystem shutdown
- * 5 Report all EFSCORRUPTED errors (all of the above errors, plus any
- * additional errors that are known to not cause shutdowns)
- *
- * xfs_panic_mask bit 0x8 turns the error reports into panics
- */
-
-enum {
- /* XFS_REFCACHE_SIZE = 1 */
- /* XFS_REFCACHE_PURGE = 2 */
- /* XFS_RESTRICT_CHOWN = 3 */
- XFS_SGID_INHERIT = 4,
- XFS_SYMLINK_MODE = 5,
- XFS_PANIC_MASK = 6,
- XFS_ERRLEVEL = 7,
- XFS_SYNCD_TIMER = 8,
- /* XFS_PROBE_DMAPI = 9 */
- /* XFS_PROBE_IOOPS = 10 */
- /* XFS_PROBE_QUOTA = 11 */
- XFS_STATS_CLEAR = 12,
- XFS_INHERIT_SYNC = 13,
- XFS_INHERIT_NODUMP = 14,
- XFS_INHERIT_NOATIME = 15,
- XFS_BUF_TIMER = 16,
- XFS_BUF_AGE = 17,
- /* XFS_IO_BYPASS = 18 */
- XFS_INHERIT_NOSYM = 19,
- XFS_ROTORSTEP = 20,
- XFS_INHERIT_NODFRG = 21,
- XFS_FILESTREAM_TIMER = 22,
-};
-
-extern xfs_param_t xfs_params;
-
-#ifdef CONFIG_SYSCTL
-extern int xfs_sysctl_register(void);
-extern void xfs_sysctl_unregister(void);
-#else
-# define xfs_sysctl_register() (0)
-# define xfs_sysctl_unregister() do { } while (0)
-#endif /* CONFIG_SYSCTL */
-
-#endif /* __XFS_SYSCTL_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
deleted file mode 100644
index 88d25d4..0000000
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_mount.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_alloc.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_log_priv.h"
-#include "xfs_buf_item.h"
-#include "xfs_quota.h"
-#include "xfs_iomap.h"
-#include "xfs_aops.h"
-#include "quota/xfs_dquot_item.h"
-#include "quota/xfs_dquot.h"
-#include "xfs_log_recover.h"
-#include "xfs_inode_item.h"
-
-/*
- * We include this last to have the helpers above available for the trace
- * event implementations.
- */
-#define CREATE_TRACE_POINTS
-#include "xfs_trace.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
deleted file mode 100644
index d48b7a5..0000000
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ /dev/null
@@ -1,1776 +0,0 @@
-/*
- * Copyright (c) 2009, Christoph Hellwig
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#undef TRACE_SYSTEM
-#define TRACE_SYSTEM xfs
-
-#if !defined(_TRACE_XFS_H) || defined(TRACE_HEADER_MULTI_READ)
-#define _TRACE_XFS_H
-
-#include <linux/tracepoint.h>
-
-struct xfs_agf;
-struct xfs_alloc_arg;
-struct xfs_attr_list_context;
-struct xfs_buf_log_item;
-struct xfs_da_args;
-struct xfs_da_node_entry;
-struct xfs_dquot;
-struct xlog_ticket;
-struct log;
-struct xlog_recover;
-struct xlog_recover_item;
-struct xfs_buf_log_format;
-struct xfs_inode_log_format;
-
-DECLARE_EVENT_CLASS(xfs_attr_list_class,
- TP_PROTO(struct xfs_attr_list_context *ctx),
- TP_ARGS(ctx),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(u32, hashval)
- __field(u32, blkno)
- __field(u32, offset)
- __field(void *, alist)
- __field(int, bufsize)
- __field(int, count)
- __field(int, firstu)
- __field(int, dupcnt)
- __field(int, flags)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
- __entry->ino = ctx->dp->i_ino;
- __entry->hashval = ctx->cursor->hashval;
- __entry->blkno = ctx->cursor->blkno;
- __entry->offset = ctx->cursor->offset;
- __entry->alist = ctx->alist;
- __entry->bufsize = ctx->bufsize;
- __entry->count = ctx->count;
- __entry->firstu = ctx->firstu;
- __entry->flags = ctx->flags;
- ),
- TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
- "alist 0x%p size %u count %u firstu %u flags %d %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->hashval,
- __entry->blkno,
- __entry->offset,
- __entry->dupcnt,
- __entry->alist,
- __entry->bufsize,
- __entry->count,
- __entry->firstu,
- __entry->flags,
- __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
- )
-)
-
-#define DEFINE_ATTR_LIST_EVENT(name) \
-DEFINE_EVENT(xfs_attr_list_class, name, \
- TP_PROTO(struct xfs_attr_list_context *ctx), \
- TP_ARGS(ctx))
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf_end);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_full);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_add);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_wrong_blk);
-DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
-
-DECLARE_EVENT_CLASS(xfs_perag_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
- unsigned long caller_ip),
- TP_ARGS(mp, agno, refcount, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(int, refcount)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->refcount = refcount;
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d agno %u refcount %d caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->refcount,
- (char *)__entry->caller_ip)
-);
-
-#define DEFINE_PERAG_REF_EVENT(name) \
-DEFINE_EVENT(xfs_perag_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \
- unsigned long caller_ip), \
- TP_ARGS(mp, agno, refcount, caller_ip))
-DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
-DEFINE_PERAG_REF_EVENT(xfs_perag_put);
-DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
-DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
-
-TRACE_EVENT(xfs_attr_list_node_descend,
- TP_PROTO(struct xfs_attr_list_context *ctx,
- struct xfs_da_node_entry *btree),
- TP_ARGS(ctx, btree),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(u32, hashval)
- __field(u32, blkno)
- __field(u32, offset)
- __field(void *, alist)
- __field(int, bufsize)
- __field(int, count)
- __field(int, firstu)
- __field(int, dupcnt)
- __field(int, flags)
- __field(u32, bt_hashval)
- __field(u32, bt_before)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
- __entry->ino = ctx->dp->i_ino;
- __entry->hashval = ctx->cursor->hashval;
- __entry->blkno = ctx->cursor->blkno;
- __entry->offset = ctx->cursor->offset;
- __entry->alist = ctx->alist;
- __entry->bufsize = ctx->bufsize;
- __entry->count = ctx->count;
- __entry->firstu = ctx->firstu;
- __entry->flags = ctx->flags;
- __entry->bt_hashval = be32_to_cpu(btree->hashval);
- __entry->bt_before = be32_to_cpu(btree->before);
- ),
- TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
- "alist 0x%p size %u count %u firstu %u flags %d %s "
- "node hashval %u, node before %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->hashval,
- __entry->blkno,
- __entry->offset,
- __entry->dupcnt,
- __entry->alist,
- __entry->bufsize,
- __entry->count,
- __entry->firstu,
- __entry->flags,
- __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS),
- __entry->bt_hashval,
- __entry->bt_before)
-);
-
-TRACE_EVENT(xfs_iext_insert,
- TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx,
- struct xfs_bmbt_irec *r, int state, unsigned long caller_ip),
- TP_ARGS(ip, idx, r, state, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_extnum_t, idx)
- __field(xfs_fileoff_t, startoff)
- __field(xfs_fsblock_t, startblock)
- __field(xfs_filblks_t, blockcount)
- __field(xfs_exntst_t, state)
- __field(int, bmap_state)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->idx = idx;
- __entry->startoff = r->br_startoff;
- __entry->startblock = r->br_startblock;
- __entry->blockcount = r->br_blockcount;
- __entry->state = r->br_state;
- __entry->bmap_state = state;
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
- "offset %lld block %lld count %lld flag %d caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
- (long)__entry->idx,
- __entry->startoff,
- (__int64_t)__entry->startblock,
- __entry->blockcount,
- __entry->state,
- (char *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_bmap_class,
- TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
- unsigned long caller_ip),
- TP_ARGS(ip, idx, state, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_extnum_t, idx)
- __field(xfs_fileoff_t, startoff)
- __field(xfs_fsblock_t, startblock)
- __field(xfs_filblks_t, blockcount)
- __field(xfs_exntst_t, state)
- __field(int, bmap_state)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
- ip->i_afp : &ip->i_df;
- struct xfs_bmbt_irec r;
-
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->idx = idx;
- __entry->startoff = r.br_startoff;
- __entry->startblock = r.br_startblock;
- __entry->blockcount = r.br_blockcount;
- __entry->state = r.br_state;
- __entry->bmap_state = state;
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
- "offset %lld block %lld count %lld flag %d caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
- (long)__entry->idx,
- __entry->startoff,
- (__int64_t)__entry->startblock,
- __entry->blockcount,
- __entry->state,
- (char *)__entry->caller_ip)
-)
-
-#define DEFINE_BMAP_EVENT(name) \
-DEFINE_EVENT(xfs_bmap_class, name, \
- TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
- unsigned long caller_ip), \
- TP_ARGS(ip, idx, state, caller_ip))
-DEFINE_BMAP_EVENT(xfs_iext_remove);
-DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
-DEFINE_BMAP_EVENT(xfs_bmap_post_update);
-DEFINE_BMAP_EVENT(xfs_extlist);
-
-DECLARE_EVENT_CLASS(xfs_buf_class,
- TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
- TP_ARGS(bp, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_daddr_t, bno)
- __field(size_t, buffer_length)
- __field(int, hold)
- __field(int, pincount)
- __field(unsigned, lockval)
- __field(unsigned, flags)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = bp->b_target->bt_dev;
- __entry->bno = bp->b_bn;
- __entry->buffer_length = bp->b_buffer_length;
- __entry->hold = atomic_read(&bp->b_hold);
- __entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
- __entry->flags = bp->b_flags;
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
- "lock %d flags %s caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long long)__entry->bno,
- __entry->buffer_length,
- __entry->hold,
- __entry->pincount,
- __entry->lockval,
- __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
- (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_EVENT(name) \
-DEFINE_EVENT(xfs_buf_class, name, \
- TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
- TP_ARGS(bp, caller_ip))
-DEFINE_BUF_EVENT(xfs_buf_init);
-DEFINE_BUF_EVENT(xfs_buf_free);
-DEFINE_BUF_EVENT(xfs_buf_hold);
-DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_iodone);
-DEFINE_BUF_EVENT(xfs_buf_iorequest);
-DEFINE_BUF_EVENT(xfs_buf_bawrite);
-DEFINE_BUF_EVENT(xfs_buf_bdwrite);
-DEFINE_BUF_EVENT(xfs_buf_lock);
-DEFINE_BUF_EVENT(xfs_buf_lock_done);
-DEFINE_BUF_EVENT(xfs_buf_cond_lock);
-DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_iowait);
-DEFINE_BUF_EVENT(xfs_buf_iowait_done);
-DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
-DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_uncached);
-DEFINE_BUF_EVENT(xfs_bdstrat_shut);
-DEFINE_BUF_EVENT(xfs_buf_item_relse);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone);
-DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
-DEFINE_BUF_EVENT(xfs_buf_error_relse);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
-DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
-
-/* not really buffer traces, but the buf provides useful information */
-DEFINE_BUF_EVENT(xfs_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_da_btree_corrupt);
-DEFINE_BUF_EVENT(xfs_reset_dqcounts);
-DEFINE_BUF_EVENT(xfs_inode_item_push);
-
-/* pass flags explicitly */
-DECLARE_EVENT_CLASS(xfs_buf_flags_class,
- TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
- TP_ARGS(bp, flags, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_daddr_t, bno)
- __field(size_t, buffer_length)
- __field(int, hold)
- __field(int, pincount)
- __field(unsigned, lockval)
- __field(unsigned, flags)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = bp->b_target->bt_dev;
- __entry->bno = bp->b_bn;
- __entry->buffer_length = bp->b_buffer_length;
- __entry->flags = flags;
- __entry->hold = atomic_read(&bp->b_hold);
- __entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
- "lock %d flags %s caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long long)__entry->bno,
- __entry->buffer_length,
- __entry->hold,
- __entry->pincount,
- __entry->lockval,
- __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
- (void *)__entry->caller_ip)
-)
-
-#define DEFINE_BUF_FLAGS_EVENT(name) \
-DEFINE_EVENT(xfs_buf_flags_class, name, \
- TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
- TP_ARGS(bp, flags, caller_ip))
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
-DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
-
-TRACE_EVENT(xfs_buf_ioerror,
- TP_PROTO(struct xfs_buf *bp, int error, unsigned long caller_ip),
- TP_ARGS(bp, error, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_daddr_t, bno)
- __field(size_t, buffer_length)
- __field(unsigned, flags)
- __field(int, hold)
- __field(int, pincount)
- __field(unsigned, lockval)
- __field(int, error)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = bp->b_target->bt_dev;
- __entry->bno = bp->b_bn;
- __entry->buffer_length = bp->b_buffer_length;
- __entry->hold = atomic_read(&bp->b_hold);
- __entry->pincount = atomic_read(&bp->b_pin_count);
- __entry->lockval = xfs_buf_lock_value(bp);
- __entry->error = error;
- __entry->flags = bp->b_flags;
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
- "lock %d error %d flags %s caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long long)__entry->bno,
- __entry->buffer_length,
- __entry->hold,
- __entry->pincount,
- __entry->lockval,
- __entry->error,
- __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
- (void *)__entry->caller_ip)
-);
-
-DECLARE_EVENT_CLASS(xfs_buf_item_class,
- TP_PROTO(struct xfs_buf_log_item *bip),
- TP_ARGS(bip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_daddr_t, buf_bno)
- __field(size_t, buf_len)
- __field(int, buf_hold)
- __field(int, buf_pincount)
- __field(int, buf_lockval)
- __field(unsigned, buf_flags)
- __field(unsigned, bli_recur)
- __field(int, bli_refcount)
- __field(unsigned, bli_flags)
- __field(void *, li_desc)
- __field(unsigned, li_flags)
- ),
- TP_fast_assign(
- __entry->dev = bip->bli_buf->b_target->bt_dev;
- __entry->bli_flags = bip->bli_flags;
- __entry->bli_recur = bip->bli_recur;
- __entry->bli_refcount = atomic_read(&bip->bli_refcount);
- __entry->buf_bno = bip->bli_buf->b_bn;
- __entry->buf_len = bip->bli_buf->b_buffer_length;
- __entry->buf_flags = bip->bli_buf->b_flags;
- __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
- __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
- __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
- __entry->li_desc = bip->bli_item.li_desc;
- __entry->li_flags = bip->bli_item.li_flags;
- ),
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
- "lock %d flags %s recur %d refcount %d bliflags %s "
- "lidesc 0x%p liflags %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- (unsigned long long)__entry->buf_bno,
- __entry->buf_len,
- __entry->buf_hold,
- __entry->buf_pincount,
- __entry->buf_lockval,
- __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
- __entry->bli_recur,
- __entry->bli_refcount,
- __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
- __entry->li_desc,
- __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
-)
-
-#define DEFINE_BUF_ITEM_EVENT(name) \
-DEFINE_EVENT(xfs_buf_item_class, name, \
- TP_PROTO(struct xfs_buf_log_item *bip), \
- TP_ARGS(bip))
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unpin_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_trylock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_unlock_stale);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_committed);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_push);
-DEFINE_BUF_ITEM_EVENT(xfs_buf_item_pushbuf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_get_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_getsb_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_read_buf_recur);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_log_buf);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_brelse);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bjoin);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
-DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
-
-DECLARE_EVENT_CLASS(xfs_lock_class,
- TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
- unsigned long caller_ip),
- TP_ARGS(ip, lock_flags, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(int, lock_flags)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->lock_flags = lock_flags;
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
- (void *)__entry->caller_ip)
-)
-
-#define DEFINE_LOCK_EVENT(name) \
-DEFINE_EVENT(xfs_lock_class, name, \
- TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
- unsigned long caller_ip), \
- TP_ARGS(ip, lock_flags, caller_ip))
-DEFINE_LOCK_EVENT(xfs_ilock);
-DEFINE_LOCK_EVENT(xfs_ilock_nowait);
-DEFINE_LOCK_EVENT(xfs_ilock_demote);
-DEFINE_LOCK_EVENT(xfs_iunlock);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
- TP_PROTO(struct xfs_inode *ip),
- TP_ARGS(ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- ),
- TP_printk("dev %d:%d ino 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino)
-)
-
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
- TP_PROTO(struct xfs_inode *ip), \
- TP_ARGS(ip))
-DEFINE_INODE_EVENT(xfs_iget_skip);
-DEFINE_INODE_EVENT(xfs_iget_reclaim);
-DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
-DEFINE_INODE_EVENT(xfs_iget_hit);
-DEFINE_INODE_EVENT(xfs_iget_miss);
-
-DEFINE_INODE_EVENT(xfs_getattr);
-DEFINE_INODE_EVENT(xfs_setattr);
-DEFINE_INODE_EVENT(xfs_readlink);
-DEFINE_INODE_EVENT(xfs_alloc_file_space);
-DEFINE_INODE_EVENT(xfs_free_file_space);
-DEFINE_INODE_EVENT(xfs_readdir);
-#ifdef CONFIG_XFS_POSIX_ACL
-DEFINE_INODE_EVENT(xfs_check_acl);
-#endif
-DEFINE_INODE_EVENT(xfs_vm_bmap);
-DEFINE_INODE_EVENT(xfs_file_ioctl);
-DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
-DEFINE_INODE_EVENT(xfs_ioctl_setattr);
-DEFINE_INODE_EVENT(xfs_file_fsync);
-DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_write_inode);
-DEFINE_INODE_EVENT(xfs_evict_inode);
-
-DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
-DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
-
-DECLARE_EVENT_CLASS(xfs_iref_class,
- TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
- TP_ARGS(ip, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(int, count)
- __field(int, pincount)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->count = atomic_read(&VFS_I(ip)->i_count);
- __entry->pincount = atomic_read(&ip->i_pincount);
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d ino 0x%llx count %d pincount %d caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->count,
- __entry->pincount,
- (char *)__entry->caller_ip)
-)
-
-#define DEFINE_IREF_EVENT(name) \
-DEFINE_EVENT(xfs_iref_class, name, \
- TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
- TP_ARGS(ip, caller_ip))
-DEFINE_IREF_EVENT(xfs_ihold);
-DEFINE_IREF_EVENT(xfs_irele);
-DEFINE_IREF_EVENT(xfs_inode_pin);
-DEFINE_IREF_EVENT(xfs_inode_unpin);
-DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
-
-DECLARE_EVENT_CLASS(xfs_namespace_class,
- TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
- TP_ARGS(dp, name),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, dp_ino)
- __dynamic_array(char, name, name->len)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(dp)->i_sb->s_dev;
- __entry->dp_ino = dp->i_ino;
- memcpy(__get_str(name), name->name, name->len);
- ),
- TP_printk("dev %d:%d dp ino 0x%llx name %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->dp_ino,
- __get_str(name))
-)
-
-#define DEFINE_NAMESPACE_EVENT(name) \
-DEFINE_EVENT(xfs_namespace_class, name, \
- TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
- TP_ARGS(dp, name))
-DEFINE_NAMESPACE_EVENT(xfs_remove);
-DEFINE_NAMESPACE_EVENT(xfs_link);
-DEFINE_NAMESPACE_EVENT(xfs_lookup);
-DEFINE_NAMESPACE_EVENT(xfs_create);
-DEFINE_NAMESPACE_EVENT(xfs_symlink);
-
-TRACE_EVENT(xfs_rename,
- TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
- struct xfs_name *src_name, struct xfs_name *target_name),
- TP_ARGS(src_dp, target_dp, src_name, target_name),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, src_dp_ino)
- __field(xfs_ino_t, target_dp_ino)
- __dynamic_array(char, src_name, src_name->len)
- __dynamic_array(char, target_name, target_name->len)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
- __entry->src_dp_ino = src_dp->i_ino;
- __entry->target_dp_ino = target_dp->i_ino;
- memcpy(__get_str(src_name), src_name->name, src_name->len);
- memcpy(__get_str(target_name), target_name->name, target_name->len);
- ),
- TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
- " src name %s target name %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->src_dp_ino,
- __entry->target_dp_ino,
- __get_str(src_name),
- __get_str(target_name))
-)
-
-DECLARE_EVENT_CLASS(xfs_dquot_class,
- TP_PROTO(struct xfs_dquot *dqp),
- TP_ARGS(dqp),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(u32, id)
- __field(unsigned, flags)
- __field(unsigned, nrefs)
- __field(unsigned long long, res_bcount)
- __field(unsigned long long, bcount)
- __field(unsigned long long, icount)
- __field(unsigned long long, blk_hardlimit)
- __field(unsigned long long, blk_softlimit)
- __field(unsigned long long, ino_hardlimit)
- __field(unsigned long long, ino_softlimit)
- ), \
- TP_fast_assign(
- __entry->dev = dqp->q_mount->m_super->s_dev;
- __entry->id = be32_to_cpu(dqp->q_core.d_id);
- __entry->flags = dqp->dq_flags;
- __entry->nrefs = dqp->q_nrefs;
- __entry->res_bcount = dqp->q_res_bcount;
- __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
- __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
- __entry->blk_hardlimit =
- be64_to_cpu(dqp->q_core.d_blk_hardlimit);
- __entry->blk_softlimit =
- be64_to_cpu(dqp->q_core.d_blk_softlimit);
- __entry->ino_hardlimit =
- be64_to_cpu(dqp->q_core.d_ino_hardlimit);
- __entry->ino_softlimit =
- be64_to_cpu(dqp->q_core.d_ino_softlimit);
- ),
- TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
- "bcnt 0x%llx bhardlimit 0x%llx bsoftlimit 0x%llx "
- "icnt 0x%llx ihardlimit 0x%llx isoftlimit 0x%llx]",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->id,
- __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
- __entry->nrefs,
- __entry->res_bcount,
- __entry->bcount,
- __entry->blk_hardlimit,
- __entry->blk_softlimit,
- __entry->icount,
- __entry->ino_hardlimit,
- __entry->ino_softlimit)
-)
-
-#define DEFINE_DQUOT_EVENT(name) \
-DEFINE_EVENT(xfs_dquot_class, name, \
- TP_PROTO(struct xfs_dquot *dqp), \
- TP_ARGS(dqp))
-DEFINE_DQUOT_EVENT(xfs_dqadjust);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_want);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_dirty);
-DEFINE_DQUOT_EVENT(xfs_dqreclaim_unlink);
-DEFINE_DQUOT_EVENT(xfs_dqattach_found);
-DEFINE_DQUOT_EVENT(xfs_dqattach_get);
-DEFINE_DQUOT_EVENT(xfs_dqinit);
-DEFINE_DQUOT_EVENT(xfs_dqreuse);
-DEFINE_DQUOT_EVENT(xfs_dqalloc);
-DEFINE_DQUOT_EVENT(xfs_dqtobp_read);
-DEFINE_DQUOT_EVENT(xfs_dqread);
-DEFINE_DQUOT_EVENT(xfs_dqread_fail);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_found);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_want);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_freelist);
-DEFINE_DQUOT_EVENT(xfs_dqlookup_done);
-DEFINE_DQUOT_EVENT(xfs_dqget_hit);
-DEFINE_DQUOT_EVENT(xfs_dqget_miss);
-DEFINE_DQUOT_EVENT(xfs_dqput);
-DEFINE_DQUOT_EVENT(xfs_dqput_wait);
-DEFINE_DQUOT_EVENT(xfs_dqput_free);
-DEFINE_DQUOT_EVENT(xfs_dqrele);
-DEFINE_DQUOT_EVENT(xfs_dqflush);
-DEFINE_DQUOT_EVENT(xfs_dqflush_force);
-DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-
-DECLARE_EVENT_CLASS(xfs_loggrant_class,
- TP_PROTO(struct log *log, struct xlog_ticket *tic),
- TP_ARGS(log, tic),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(unsigned, trans_type)
- __field(char, ocnt)
- __field(char, cnt)
- __field(int, curr_res)
- __field(int, unit_res)
- __field(unsigned int, flags)
- __field(int, reserveq)
- __field(int, writeq)
- __field(int, grant_reserve_cycle)
- __field(int, grant_reserve_bytes)
- __field(int, grant_write_cycle)
- __field(int, grant_write_bytes)
- __field(int, curr_cycle)
- __field(int, curr_block)
- __field(xfs_lsn_t, tail_lsn)
- ),
- TP_fast_assign(
- __entry->dev = log->l_mp->m_super->s_dev;
- __entry->trans_type = tic->t_trans_type;
- __entry->ocnt = tic->t_ocnt;
- __entry->cnt = tic->t_cnt;
- __entry->curr_res = tic->t_curr_res;
- __entry->unit_res = tic->t_unit_res;
- __entry->flags = tic->t_flags;
- __entry->reserveq = list_empty(&log->l_reserveq);
- __entry->writeq = list_empty(&log->l_writeq);
- xlog_crack_grant_head(&log->l_grant_reserve_head,
- &__entry->grant_reserve_cycle,
- &__entry->grant_reserve_bytes);
- xlog_crack_grant_head(&log->l_grant_write_head,
- &__entry->grant_write_cycle,
- &__entry->grant_write_bytes);
- __entry->curr_cycle = log->l_curr_cycle;
- __entry->curr_block = log->l_curr_block;
- __entry->tail_lsn = atomic64_read(&log->l_tail_lsn);
- ),
- TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
- "t_unit_res %u t_flags %s reserveq %s "
- "writeq %s grant_reserve_cycle %d "
- "grant_reserve_bytes %d grant_write_cycle %d "
- "grant_write_bytes %d curr_cycle %d curr_block %d "
- "tail_cycle %d tail_block %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
- __entry->ocnt,
- __entry->cnt,
- __entry->curr_res,
- __entry->unit_res,
- __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
- __entry->reserveq ? "empty" : "active",
- __entry->writeq ? "empty" : "active",
- __entry->grant_reserve_cycle,
- __entry->grant_reserve_bytes,
- __entry->grant_write_cycle,
- __entry->grant_write_bytes,
- __entry->curr_cycle,
- __entry->curr_block,
- CYCLE_LSN(__entry->tail_lsn),
- BLOCK_LSN(__entry->tail_lsn)
- )
-)
-
-#define DEFINE_LOGGRANT_EVENT(name) \
-DEFINE_EVENT(xfs_loggrant_class, name, \
- TP_PROTO(struct log *log, struct xlog_ticket *tic), \
- TP_ARGS(log, tic))
-DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
-DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
-DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
-DEFINE_LOGGRANT_EVENT(xfs_log_umount_write);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_grant_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_error);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake1);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_sleep2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake2);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_write_wake_up);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_regrant_reserve_sub);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_enter);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_exit);
-DEFINE_LOGGRANT_EVENT(xfs_log_ungrant_sub);
-
-DECLARE_EVENT_CLASS(xfs_file_class,
- TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags),
- TP_ARGS(ip, count, offset, flags),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_fsize_t, new_size)
- __field(loff_t, offset)
- __field(size_t, count)
- __field(int, flags)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = ip->i_new_size;
- __entry->offset = offset;
- __entry->count = count;
- __entry->flags = flags;
- ),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
- "offset 0x%llx count 0x%zx ioflags %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->new_size,
- __entry->offset,
- __entry->count,
- __print_flags(__entry->flags, "|", XFS_IO_FLAGS))
-)
-
-#define DEFINE_RW_EVENT(name) \
-DEFINE_EVENT(xfs_file_class, name, \
- TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), \
- TP_ARGS(ip, count, offset, flags))
-DEFINE_RW_EVENT(xfs_file_read);
-DEFINE_RW_EVENT(xfs_file_buffered_write);
-DEFINE_RW_EVENT(xfs_file_direct_write);
-DEFINE_RW_EVENT(xfs_file_splice_read);
-DEFINE_RW_EVENT(xfs_file_splice_write);
-
-DECLARE_EVENT_CLASS(xfs_page_class,
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off),
- TP_ARGS(inode, page, off),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(pgoff_t, pgoff)
- __field(loff_t, size)
- __field(unsigned long, offset)
- __field(int, delalloc)
- __field(int, unwritten)
- ),
- TP_fast_assign(
- int delalloc = -1, unwritten = -1;
-
- if (page_has_buffers(page))
- xfs_count_page_state(page, &delalloc, &unwritten);
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = XFS_I(inode)->i_ino;
- __entry->pgoff = page_offset(page);
- __entry->size = i_size_read(inode);
- __entry->offset = off;
- __entry->delalloc = delalloc;
- __entry->unwritten = unwritten;
- ),
- TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
- "delalloc %d unwritten %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->pgoff,
- __entry->size,
- __entry->offset,
- __entry->delalloc,
- __entry->unwritten)
-)
-
-#define DEFINE_PAGE_EVENT(name) \
-DEFINE_EVENT(xfs_page_class, name, \
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \
- TP_ARGS(inode, page, off))
-DEFINE_PAGE_EVENT(xfs_writepage);
-DEFINE_PAGE_EVENT(xfs_releasepage);
-DEFINE_PAGE_EVENT(xfs_invalidatepage);
-
-DECLARE_EVENT_CLASS(xfs_imap_class,
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
- int type, struct xfs_bmbt_irec *irec),
- TP_ARGS(ip, offset, count, type, irec),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(loff_t, size)
- __field(loff_t, new_size)
- __field(loff_t, offset)
- __field(size_t, count)
- __field(int, type)
- __field(xfs_fileoff_t, startoff)
- __field(xfs_fsblock_t, startblock)
- __field(xfs_filblks_t, blockcount)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = ip->i_new_size;
- __entry->offset = offset;
- __entry->count = count;
- __entry->type = type;
- __entry->startoff = irec ? irec->br_startoff : 0;
- __entry->startblock = irec ? irec->br_startblock : 0;
- __entry->blockcount = irec ? irec->br_blockcount : 0;
- ),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
- "offset 0x%llx count %zd type %s "
- "startoff 0x%llx startblock %lld blockcount 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->new_size,
- __entry->offset,
- __entry->count,
- __print_symbolic(__entry->type, XFS_IO_TYPES),
- __entry->startoff,
- (__int64_t)__entry->startblock,
- __entry->blockcount)
-)
-
-#define DEFINE_IOMAP_EVENT(name) \
-DEFINE_EVENT(xfs_imap_class, name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count, \
- int type, struct xfs_bmbt_irec *irec), \
- TP_ARGS(ip, offset, count, type, irec))
-DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
-DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-
-DECLARE_EVENT_CLASS(xfs_simple_io_class,
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
- TP_ARGS(ip, offset, count),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(loff_t, size)
- __field(loff_t, new_size)
- __field(loff_t, offset)
- __field(size_t, count)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = ip->i_new_size;
- __entry->offset = offset;
- __entry->count = count;
- ),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx "
- "offset 0x%llx count %zd",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->new_size,
- __entry->offset,
- __entry->count)
-);
-
-#define DEFINE_SIMPLE_IO_EVENT(name) \
-DEFINE_EVENT(xfs_simple_io_class, name, \
- TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), \
- TP_ARGS(ip, offset, count))
-DEFINE_SIMPLE_IO_EVENT(xfs_delalloc_enospc);
-DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
-DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
-
-
-TRACE_EVENT(xfs_itruncate_start,
- TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size, int flag,
- xfs_off_t toss_start, xfs_off_t toss_finish),
- TP_ARGS(ip, new_size, flag, toss_start, toss_finish),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_fsize_t, new_size)
- __field(xfs_off_t, toss_start)
- __field(xfs_off_t, toss_finish)
- __field(int, flag)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = new_size;
- __entry->toss_start = toss_start;
- __entry->toss_finish = toss_finish;
- __entry->flag = flag;
- ),
- TP_printk("dev %d:%d ino 0x%llx %s size 0x%llx new_size 0x%llx "
- "toss start 0x%llx toss finish 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->flag, "|", XFS_ITRUNC_FLAGS),
- __entry->size,
- __entry->new_size,
- __entry->toss_start,
- __entry->toss_finish)
-);
-
-DECLARE_EVENT_CLASS(xfs_itrunc_class,
- TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
- TP_ARGS(ip, new_size),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_fsize_t, new_size)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->new_size = new_size;
- ),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->new_size)
-)
-
-#define DEFINE_ITRUNC_EVENT(name) \
-DEFINE_EVENT(xfs_itrunc_class, name, \
- TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
- TP_ARGS(ip, new_size))
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
-DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
-
-TRACE_EVENT(xfs_pagecache_inval,
- TP_PROTO(struct xfs_inode *ip, xfs_off_t start, xfs_off_t finish),
- TP_ARGS(ip, start, finish),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_off_t, start)
- __field(xfs_off_t, finish)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->start = start;
- __entry->finish = finish;
- ),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx start 0x%llx finish 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->start,
- __entry->finish)
-);
-
-TRACE_EVENT(xfs_bunmap,
- TP_PROTO(struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len,
- int flags, unsigned long caller_ip),
- TP_ARGS(ip, bno, len, flags, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(xfs_fsize_t, size)
- __field(xfs_fileoff_t, bno)
- __field(xfs_filblks_t, len)
- __field(unsigned long, caller_ip)
- __field(int, flags)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->ino = ip->i_ino;
- __entry->size = ip->i_d.di_size;
- __entry->bno = bno;
- __entry->len = len;
- __entry->caller_ip = caller_ip;
- __entry->flags = flags;
- ),
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx bno 0x%llx len 0x%llx"
- "flags %s caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->bno,
- __entry->len,
- __print_flags(__entry->flags, "|", XFS_BMAPI_FLAGS),
- (void *)__entry->caller_ip)
-
-);
-
-DECLARE_EVENT_CLASS(xfs_busy_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, agbno, len),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, len)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->len = len;
- ),
- TP_printk("dev %d:%d agno %u agbno %u len %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->agbno,
- __entry->len)
-);
-#define DEFINE_BUSY_EVENT(name) \
-DEFINE_EVENT(xfs_busy_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len), \
- TP_ARGS(mp, agno, agbno, len))
-DEFINE_BUSY_EVENT(xfs_alloc_busy);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
-DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
-
-TRACE_EVENT(xfs_alloc_busy_trim,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len,
- xfs_agblock_t tbno, xfs_extlen_t tlen),
- TP_ARGS(mp, agno, agbno, len, tbno, tlen),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, len)
- __field(xfs_agblock_t, tbno)
- __field(xfs_extlen_t, tlen)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->len = len;
- __entry->tbno = tbno;
- __entry->tlen = tlen;
- ),
- TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->agbno,
- __entry->len,
- __entry->tbno,
- __entry->tlen)
-);
-
-TRACE_EVENT(xfs_trans_commit_lsn,
- TP_PROTO(struct xfs_trans *trans),
- TP_ARGS(trans),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(struct xfs_trans *, tp)
- __field(xfs_lsn_t, lsn)
- ),
- TP_fast_assign(
- __entry->dev = trans->t_mountp->m_super->s_dev;
- __entry->tp = trans;
- __entry->lsn = trans->t_commit_lsn;
- ),
- TP_printk("dev %d:%d trans 0x%p commit_lsn 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->tp,
- __entry->lsn)
-);
-
-TRACE_EVENT(xfs_agf,
- TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
- unsigned long caller_ip),
- TP_ARGS(mp, agf, flags, caller_ip),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(int, flags)
- __field(__u32, length)
- __field(__u32, bno_root)
- __field(__u32, cnt_root)
- __field(__u32, bno_level)
- __field(__u32, cnt_level)
- __field(__u32, flfirst)
- __field(__u32, fllast)
- __field(__u32, flcount)
- __field(__u32, freeblks)
- __field(__u32, longest)
- __field(unsigned long, caller_ip)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = be32_to_cpu(agf->agf_seqno),
- __entry->flags = flags;
- __entry->length = be32_to_cpu(agf->agf_length),
- __entry->bno_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]),
- __entry->cnt_root = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]),
- __entry->bno_level =
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]),
- __entry->cnt_level =
- be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]),
- __entry->flfirst = be32_to_cpu(agf->agf_flfirst),
- __entry->fllast = be32_to_cpu(agf->agf_fllast),
- __entry->flcount = be32_to_cpu(agf->agf_flcount),
- __entry->freeblks = be32_to_cpu(agf->agf_freeblks),
- __entry->longest = be32_to_cpu(agf->agf_longest);
- __entry->caller_ip = caller_ip;
- ),
- TP_printk("dev %d:%d agno %u flags %s length %u roots b %u c %u "
- "levels b %u c %u flfirst %u fllast %u flcount %u "
- "freeblks %u longest %u caller %pf",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __print_flags(__entry->flags, "|", XFS_AGF_FLAGS),
- __entry->length,
- __entry->bno_root,
- __entry->cnt_root,
- __entry->bno_level,
- __entry->cnt_level,
- __entry->flfirst,
- __entry->fllast,
- __entry->flcount,
- __entry->freeblks,
- __entry->longest,
- (void *)__entry->caller_ip)
-);
-
-TRACE_EVENT(xfs_free_extent,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_extlen_t len, bool isfl, int haveleft, int haveright),
- TP_ARGS(mp, agno, agbno, len, isfl, haveleft, haveright),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, len)
- __field(int, isfl)
- __field(int, haveleft)
- __field(int, haveright)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->len = len;
- __entry->isfl = isfl;
- __entry->haveleft = haveleft;
- __entry->haveright = haveright;
- ),
- TP_printk("dev %d:%d agno %u agbno %u len %u isfl %d %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->agbno,
- __entry->len,
- __entry->isfl,
- __entry->haveleft ?
- (__entry->haveright ? "both" : "left") :
- (__entry->haveright ? "right" : "none"))
-
-);
-
-DECLARE_EVENT_CLASS(xfs_alloc_class,
- TP_PROTO(struct xfs_alloc_arg *args),
- TP_ARGS(args),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, minlen)
- __field(xfs_extlen_t, maxlen)
- __field(xfs_extlen_t, mod)
- __field(xfs_extlen_t, prod)
- __field(xfs_extlen_t, minleft)
- __field(xfs_extlen_t, total)
- __field(xfs_extlen_t, alignment)
- __field(xfs_extlen_t, minalignslop)
- __field(xfs_extlen_t, len)
- __field(short, type)
- __field(short, otype)
- __field(char, wasdel)
- __field(char, wasfromfl)
- __field(char, isfl)
- __field(char, userdata)
- __field(xfs_fsblock_t, firstblock)
- ),
- TP_fast_assign(
- __entry->dev = args->mp->m_super->s_dev;
- __entry->agno = args->agno;
- __entry->agbno = args->agbno;
- __entry->minlen = args->minlen;
- __entry->maxlen = args->maxlen;
- __entry->mod = args->mod;
- __entry->prod = args->prod;
- __entry->minleft = args->minleft;
- __entry->total = args->total;
- __entry->alignment = args->alignment;
- __entry->minalignslop = args->minalignslop;
- __entry->len = args->len;
- __entry->type = args->type;
- __entry->otype = args->otype;
- __entry->wasdel = args->wasdel;
- __entry->wasfromfl = args->wasfromfl;
- __entry->isfl = args->isfl;
- __entry->userdata = args->userdata;
- __entry->firstblock = args->firstblock;
- ),
- TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
- "prod %u minleft %u total %u alignment %u minalignslop %u "
- "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
- "userdata %d firstblock 0x%llx",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->agbno,
- __entry->minlen,
- __entry->maxlen,
- __entry->mod,
- __entry->prod,
- __entry->minleft,
- __entry->total,
- __entry->alignment,
- __entry->minalignslop,
- __entry->len,
- __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
- __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
- __entry->wasdel,
- __entry->wasfromfl,
- __entry->isfl,
- __entry->userdata,
- (unsigned long long)__entry->firstblock)
-)
-
-#define DEFINE_ALLOC_EVENT(name) \
-DEFINE_EVENT(xfs_alloc_class, name, \
- TP_PROTO(struct xfs_alloc_arg *args), \
- TP_ARGS(args))
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
-DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
-DEFINE_ALLOC_EVENT(xfs_alloc_small_error);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
-DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-
-DECLARE_EVENT_CLASS(xfs_dir2_class,
- TP_PROTO(struct xfs_da_args *args),
- TP_ARGS(args),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __dynamic_array(char, name, args->namelen)
- __field(int, namelen)
- __field(xfs_dahash_t, hashval)
- __field(xfs_ino_t, inumber)
- __field(int, op_flags)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
- __entry->ino = args->dp->i_ino;
- if (args->namelen)
- memcpy(__get_str(name), args->name, args->namelen);
- __entry->namelen = args->namelen;
- __entry->hashval = args->hashval;
- __entry->inumber = args->inumber;
- __entry->op_flags = args->op_flags;
- ),
- TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
- "inumber 0x%llx op_flags %s",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->namelen,
- __entry->namelen ? __get_str(name) : NULL,
- __entry->namelen,
- __entry->hashval,
- __entry->inumber,
- __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
-)
-
-#define DEFINE_DIR2_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_class, name, \
- TP_PROTO(struct xfs_da_args *args), \
- TP_ARGS(args))
-DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
-DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
-DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
-DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
-DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
-DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
-DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
-DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
-DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
-
-DECLARE_EVENT_CLASS(xfs_dir2_space_class,
- TP_PROTO(struct xfs_da_args *args, int idx),
- TP_ARGS(args, idx),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(int, op_flags)
- __field(int, idx)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
- __entry->ino = args->dp->i_ino;
- __entry->op_flags = args->op_flags;
- __entry->idx = idx;
- ),
- TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
- __entry->idx)
-)
-
-#define DEFINE_DIR2_SPACE_EVENT(name) \
-DEFINE_EVENT(xfs_dir2_space_class, name, \
- TP_PROTO(struct xfs_da_args *args, int idx), \
- TP_ARGS(args, idx))
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
-DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
-
-TRACE_EVENT(xfs_dir2_leafn_moveents,
- TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
- TP_ARGS(args, src_idx, dst_idx, count),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(int, op_flags)
- __field(int, src_idx)
- __field(int, dst_idx)
- __field(int, count)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
- __entry->ino = args->dp->i_ino;
- __entry->op_flags = args->op_flags;
- __entry->src_idx = src_idx;
- __entry->dst_idx = dst_idx;
- __entry->count = count;
- ),
- TP_printk("dev %d:%d ino 0x%llx op_flags %s "
- "src_idx %d dst_idx %d count %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
- __entry->src_idx,
- __entry->dst_idx,
- __entry->count)
-);
-
-#define XFS_SWAPEXT_INODES \
- { 0, "target" }, \
- { 1, "temp" }
-
-#define XFS_INODE_FORMAT_STR \
- { 0, "invalid" }, \
- { 1, "local" }, \
- { 2, "extent" }, \
- { 3, "btree" }
-
-DECLARE_EVENT_CLASS(xfs_swap_extent_class,
- TP_PROTO(struct xfs_inode *ip, int which),
- TP_ARGS(ip, which),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(int, which)
- __field(xfs_ino_t, ino)
- __field(int, format)
- __field(int, nex)
- __field(int, max_nex)
- __field(int, broot_size)
- __field(int, fork_off)
- ),
- TP_fast_assign(
- __entry->dev = VFS_I(ip)->i_sb->s_dev;
- __entry->which = which;
- __entry->ino = ip->i_ino;
- __entry->format = ip->i_d.di_format;
- __entry->nex = ip->i_d.di_nextents;
- __entry->max_nex = ip->i_df.if_ext_max;
- __entry->broot_size = ip->i_df.if_broot_bytes;
- __entry->fork_off = XFS_IFORK_BOFF(ip);
- ),
- TP_printk("dev %d:%d ino 0x%llx (%s), %s format, num_extents %d, "
- "Max in-fork extents %d, broot size %d, fork offset %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __print_symbolic(__entry->which, XFS_SWAPEXT_INODES),
- __print_symbolic(__entry->format, XFS_INODE_FORMAT_STR),
- __entry->nex,
- __entry->max_nex,
- __entry->broot_size,
- __entry->fork_off)
-)
-
-#define DEFINE_SWAPEXT_EVENT(name) \
-DEFINE_EVENT(xfs_swap_extent_class, name, \
- TP_PROTO(struct xfs_inode *ip, int which), \
- TP_ARGS(ip, which))
-
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_before);
-DEFINE_SWAPEXT_EVENT(xfs_swap_extent_after);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_item_class,
- TP_PROTO(struct log *log, struct xlog_recover *trans,
- struct xlog_recover_item *item, int pass),
- TP_ARGS(log, trans, item, pass),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(unsigned long, item)
- __field(xlog_tid_t, tid)
- __field(int, type)
- __field(int, pass)
- __field(int, count)
- __field(int, total)
- ),
- TP_fast_assign(
- __entry->dev = log->l_mp->m_super->s_dev;
- __entry->item = (unsigned long)item;
- __entry->tid = trans->r_log_tid;
- __entry->type = ITEM_TYPE(item);
- __entry->pass = pass;
- __entry->count = item->ri_cnt;
- __entry->total = item->ri_total;
- ),
- TP_printk("dev %d:%d trans 0x%x, pass %d, item 0x%p, item type %s "
- "item region count/total %d/%d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->tid,
- __entry->pass,
- (void *)__entry->item,
- __print_symbolic(__entry->type, XFS_LI_TYPE_DESC),
- __entry->count,
- __entry->total)
-)
-
-#define DEFINE_LOG_RECOVER_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_item_class, name, \
- TP_PROTO(struct log *log, struct xlog_recover *trans, \
- struct xlog_recover_item *item, int pass), \
- TP_ARGS(log, trans, item, pass))
-
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_add_cont);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_head);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_reorder_tail);
-DEFINE_LOG_RECOVER_ITEM(xfs_log_recover_item_recover);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_buf_item_class,
- TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f),
- TP_ARGS(log, buf_f),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(__int64_t, blkno)
- __field(unsigned short, len)
- __field(unsigned short, flags)
- __field(unsigned short, size)
- __field(unsigned int, map_size)
- ),
- TP_fast_assign(
- __entry->dev = log->l_mp->m_super->s_dev;
- __entry->blkno = buf_f->blf_blkno;
- __entry->len = buf_f->blf_len;
- __entry->flags = buf_f->blf_flags;
- __entry->size = buf_f->blf_size;
- __entry->map_size = buf_f->blf_map_size;
- ),
- TP_printk("dev %d:%d blkno 0x%llx, len %u, flags 0x%x, size %d, "
- "map_size %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->blkno,
- __entry->len,
- __entry->flags,
- __entry->size,
- __entry->map_size)
-)
-
-#define DEFINE_LOG_RECOVER_BUF_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_buf_item_class, name, \
- TP_PROTO(struct log *log, struct xfs_buf_log_format *buf_f), \
- TP_ARGS(log, buf_f))
-
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_not_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_add);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_cancel_ref_inc);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_recover);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_inode_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_reg_buf);
-DEFINE_LOG_RECOVER_BUF_ITEM(xfs_log_recover_buf_dquot_buf);
-
-DECLARE_EVENT_CLASS(xfs_log_recover_ino_item_class,
- TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f),
- TP_ARGS(log, in_f),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(unsigned short, size)
- __field(int, fields)
- __field(unsigned short, asize)
- __field(unsigned short, dsize)
- __field(__int64_t, blkno)
- __field(int, len)
- __field(int, boffset)
- ),
- TP_fast_assign(
- __entry->dev = log->l_mp->m_super->s_dev;
- __entry->ino = in_f->ilf_ino;
- __entry->size = in_f->ilf_size;
- __entry->fields = in_f->ilf_fields;
- __entry->asize = in_f->ilf_asize;
- __entry->dsize = in_f->ilf_dsize;
- __entry->blkno = in_f->ilf_blkno;
- __entry->len = in_f->ilf_len;
- __entry->boffset = in_f->ilf_boffset;
- ),
- TP_printk("dev %d:%d ino 0x%llx, size %u, fields 0x%x, asize %d, "
- "dsize %d, blkno 0x%llx, len %d, boffset %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->size,
- __entry->fields,
- __entry->asize,
- __entry->dsize,
- __entry->blkno,
- __entry->len,
- __entry->boffset)
-)
-#define DEFINE_LOG_RECOVER_INO_ITEM(name) \
-DEFINE_EVENT(xfs_log_recover_ino_item_class, name, \
- TP_PROTO(struct log *log, struct xfs_inode_log_format *in_f), \
- TP_ARGS(log, in_f))
-
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_recover);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_cancel);
-DEFINE_LOG_RECOVER_INO_ITEM(xfs_log_recover_inode_skip);
-
-DECLARE_EVENT_CLASS(xfs_discard_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, agbno, len),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
- __field(xfs_extlen_t, len)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->len = len;
- ),
- TP_printk("dev %d:%d agno %u agbno %u len %u\n",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
- __entry->agbno,
- __entry->len)
-)
-
-#define DEFINE_DISCARD_EVENT(name) \
-DEFINE_EVENT(xfs_discard_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len), \
- TP_ARGS(mp, agno, agbno, len))
-DEFINE_DISCARD_EVENT(xfs_discard_extent);
-DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
-DEFINE_DISCARD_EVENT(xfs_discard_exclude);
-DEFINE_DISCARD_EVENT(xfs_discard_busy);
-
-#endif /* _TRACE_XFS_H */
-
-#undef TRACE_INCLUDE_PATH
-#define TRACE_INCLUDE_PATH .
-#define TRACE_INCLUDE_FILE xfs_trace
-#include <trace/define_trace.h>
diff --git a/fs/xfs/linux-2.6/xfs_vnode.h b/fs/xfs/linux-2.6/xfs_vnode.h
deleted file mode 100644
index 7c220b4..0000000
--- a/fs/xfs/linux-2.6/xfs_vnode.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VNODE_H__
-#define __XFS_VNODE_H__
-
-#include "xfs_fs.h"
-
-struct file;
-struct xfs_inode;
-struct xfs_iomap;
-struct attrlist_cursor_kern;
-
-/*
- * Return values for xfs_inactive. A return value of
- * VN_INACTIVE_NOCACHE implies that the file system behavior
- * has disassociated its state and bhv_desc_t from the vnode.
- */
-#define VN_INACTIVE_CACHE 0
-#define VN_INACTIVE_NOCACHE 1
-
-/*
- * Flags for read/write calls - same values as IRIX
- */
-#define IO_ISDIRECT 0x00004 /* bypass page cache */
-#define IO_INVIS 0x00020 /* don't update inode timestamps */
-
-#define XFS_IO_FLAGS \
- { IO_ISDIRECT, "DIRECT" }, \
- { IO_INVIS, "INVIS"}
-
-/*
- * Flush/Invalidate options for vop_toss/flush/flushinval_pages.
- */
-#define FI_NONE 0 /* none */
-#define FI_REMAPF 1 /* Do a remapf prior to the operation */
-#define FI_REMAPF_LOCKED 2 /* Do a remapf prior to the operation.
- Prevent VM access to the pages until
- the operation completes. */
-
-/*
- * Some useful predicates.
- */
-#define VN_MAPPED(vp) mapping_mapped(vp->i_mapping)
-#define VN_CACHED(vp) (vp->i_mapping->nrpages)
-#define VN_DIRTY(vp) mapping_tagged(vp->i_mapping, \
- PAGECACHE_TAG_DIRTY)
-
-
-#endif /* __XFS_VNODE_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
deleted file mode 100644
index 87d3e03..0000000
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (C) 2008 Christoph Hellwig.
- * Portions Copyright (C) 2000-2008 Silicon Graphics, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "xfs.h"
-#include "xfs_da_btree.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_attr.h"
-#include "xfs_attr_leaf.h"
-#include "xfs_acl.h"
-#include "xfs_vnodeops.h"
-
-#include <linux/posix_acl_xattr.h>
-#include <linux/xattr.h>
-
-
-static int
-xfs_xattr_get(struct dentry *dentry, const char *name,
- void *value, size_t size, int xflags)
-{
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
- int error, asize = size;
-
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (!size) {
- xflags |= ATTR_KERNOVAL;
- value = NULL;
- }
-
- error = -xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
- if (error)
- return error;
- return asize;
-}
-
-static int
-xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags, int xflags)
-{
- struct xfs_inode *ip = XFS_I(dentry->d_inode);
-
- if (strcmp(name, "") == 0)
- return -EINVAL;
-
- /* Convert Linux syscall to XFS internal ATTR flags */
- if (flags & XATTR_CREATE)
- xflags |= ATTR_CREATE;
- if (flags & XATTR_REPLACE)
- xflags |= ATTR_REPLACE;
-
- if (!value)
- return -xfs_attr_remove(ip, (unsigned char *)name, xflags);
- return -xfs_attr_set(ip, (unsigned char *)name,
- (void *)value, size, xflags);
-}
-
-static const struct xattr_handler xfs_xattr_user_handler = {
- .prefix = XATTR_USER_PREFIX,
- .flags = 0, /* no flags implies user namespace */
- .get = xfs_xattr_get,
- .set = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_trusted_handler = {
- .prefix = XATTR_TRUSTED_PREFIX,
- .flags = ATTR_ROOT,
- .get = xfs_xattr_get,
- .set = xfs_xattr_set,
-};
-
-static const struct xattr_handler xfs_xattr_security_handler = {
- .prefix = XATTR_SECURITY_PREFIX,
- .flags = ATTR_SECURE,
- .get = xfs_xattr_get,
- .set = xfs_xattr_set,
-};
-
-const struct xattr_handler *xfs_xattr_handlers[] = {
- &xfs_xattr_user_handler,
- &xfs_xattr_trusted_handler,
- &xfs_xattr_security_handler,
-#ifdef CONFIG_XFS_POSIX_ACL
- &xfs_xattr_acl_access_handler,
- &xfs_xattr_acl_default_handler,
-#endif
- NULL
-};
-
-static unsigned int xfs_xattr_prefix_len(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return sizeof("security");
- else if (flags & XFS_ATTR_ROOT)
- return sizeof("trusted");
- else
- return sizeof("user");
-}
-
-static const char *xfs_xattr_prefix(int flags)
-{
- if (flags & XFS_ATTR_SECURE)
- return xfs_xattr_security_handler.prefix;
- else if (flags & XFS_ATTR_ROOT)
- return xfs_xattr_trusted_handler.prefix;
- else
- return xfs_xattr_user_handler.prefix;
-}
-
-static int
-xfs_xattr_put_listent(
- struct xfs_attr_list_context *context,
- int flags,
- unsigned char *name,
- int namelen,
- int valuelen,
- unsigned char *value)
-{
- unsigned int prefix_len = xfs_xattr_prefix_len(flags);
- char *offset;
- int arraytop;
-
- ASSERT(context->count >= 0);
-
- /*
- * Only show root namespace entries if we are actually allowed to
- * see them.
- */
- if ((flags & XFS_ATTR_ROOT) && !capable(CAP_SYS_ADMIN))
- return 0;
-
- arraytop = context->count + prefix_len + namelen + 1;
- if (arraytop > context->firstu) {
- context->count = -1; /* insufficient space */
- return 1;
- }
- offset = (char *)context->alist + context->count;
- strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
- offset += prefix_len;
- strncpy(offset, (char *)name, namelen); /* real name */
- offset += namelen;
- *offset = '\0';
- context->count += prefix_len + namelen + 1;
- return 0;
-}
-
-static int
-xfs_xattr_put_listent_sizes(
- struct xfs_attr_list_context *context,
- int flags,
- unsigned char *name,
- int namelen,
- int valuelen,
- unsigned char *value)
-{
- context->count += xfs_xattr_prefix_len(flags) + namelen + 1;
- return 0;
-}
-
-static int
-list_one_attr(const char *name, const size_t len, void *data,
- size_t size, ssize_t *result)
-{
- char *p = data + *result;
-
- *result += len;
- if (!size)
- return 0;
- if (*result > size)
- return -ERANGE;
-
- strcpy(p, name);
- return 0;
-}
-
-ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
-{
- struct xfs_attr_list_context context;
- struct attrlist_cursor_kern cursor = { 0 };
- struct inode *inode = dentry->d_inode;
- int error;
-
- /*
- * First read the regular on-disk attributes.
- */
- memset(&context, 0, sizeof(context));
- context.dp = XFS_I(inode);
- context.cursor = &cursor;
- context.resynch = 1;
- context.alist = data;
- context.bufsize = size;
- context.firstu = context.bufsize;
-
- if (size)
- context.put_listent = xfs_xattr_put_listent;
- else
- context.put_listent = xfs_xattr_put_listent_sizes;
-
- xfs_attr_list_int(&context);
- if (context.count < 0)
- return -ERANGE;
-
- /*
- * Then add the two synthetic ACL attributes.
- */
- if (posix_acl_access_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_ACCESS,
- strlen(POSIX_ACL_XATTR_ACCESS) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
- if (posix_acl_default_exists(inode)) {
- error = list_one_attr(POSIX_ACL_XATTR_DEFAULT,
- strlen(POSIX_ACL_XATTR_DEFAULT) + 1,
- data, size, &context.count);
- if (error)
- return error;
- }
-
- return context.count;
-}
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c
deleted file mode 100644
index 6fa2146..0000000
--- a/fs/xfs/quota/xfs_dquot.c
+++ /dev/null
@@ -1,1496 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-
-/*
- LOCK ORDER
-
- inode lock (ilock)
- dquot hash-chain lock (hashlock)
- xqm dquot freelist lock (freelistlock
- mount's dquot list lock (mplistlock)
- user dquot lock - lock ordering among dquots is based on the uid or gid
- group dquot lock - similar to udquots. Between the two dquots, the udquot
- has to be locked first.
- pin lock - the dquot lock must be held to take this lock.
- flush lock - ditto.
-*/
-
-#ifdef DEBUG
-xfs_buftarg_t *xfs_dqerror_target;
-int xfs_do_dqerror;
-int xfs_dqreq_num;
-int xfs_dqerror_mod = 33;
-#endif
-
-static struct lock_class_key xfs_dquot_other_class;
-
-/*
- * Allocate and initialize a dquot. We don't always allocate fresh memory;
- * we try to reclaim a free dquot if the number of incore dquots are above
- * a threshold.
- * The only field inside the core that gets initialized at this point
- * is the d_id field. The idea is to fill in the entire q_core
- * when we read in the on disk dquot.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqinit(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type)
-{
- xfs_dquot_t *dqp;
- boolean_t brandnewdquot;
-
- brandnewdquot = xfs_qm_dqalloc_incore(&dqp);
- dqp->dq_flags = type;
- dqp->q_core.d_id = cpu_to_be32(id);
- dqp->q_mount = mp;
-
- /*
- * No need to re-initialize these if this is a reclaimed dquot.
- */
- if (brandnewdquot) {
- INIT_LIST_HEAD(&dqp->q_freelist);
- mutex_init(&dqp->q_qlock);
- init_waitqueue_head(&dqp->q_pinwait);
-
- /*
- * Because we want to use a counting completion, complete
- * the flush completion once to allow a single access to
- * the flush completion without blocking.
- */
- init_completion(&dqp->q_flush);
- complete(&dqp->q_flush);
-
- trace_xfs_dqinit(dqp);
- } else {
- /*
- * Only the q_core portion was zeroed in dqreclaim_one().
- * So, we need to reset others.
- */
- dqp->q_nrefs = 0;
- dqp->q_blkno = 0;
- INIT_LIST_HEAD(&dqp->q_mplist);
- INIT_LIST_HEAD(&dqp->q_hashlist);
- dqp->q_bufoffset = 0;
- dqp->q_fileoffset = 0;
- dqp->q_transp = NULL;
- dqp->q_gdquot = NULL;
- dqp->q_res_bcount = 0;
- dqp->q_res_icount = 0;
- dqp->q_res_rtbcount = 0;
- atomic_set(&dqp->q_pincount, 0);
- dqp->q_hash = NULL;
- ASSERT(list_empty(&dqp->q_freelist));
-
- trace_xfs_dqreuse(dqp);
- }
-
- /*
- * In either case we need to make sure group quotas have a different
- * lock class than user quotas, to make sure lockdep knows we can
- * locks of one of each at the same time.
- */
- if (!(type & XFS_DQ_USER))
- lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class);
-
- /*
- * log item gets initialized later
- */
- return (dqp);
-}
-
-/*
- * This is called to free all the memory associated with a dquot
- */
-void
-xfs_qm_dqdestroy(
- xfs_dquot_t *dqp)
-{
- ASSERT(list_empty(&dqp->q_freelist));
-
- mutex_destroy(&dqp->q_qlock);
- kmem_zone_free(xfs_Gqm->qm_dqzone, dqp);
-
- atomic_dec(&xfs_Gqm->qm_totaldquots);
-}
-
-/*
- * This is what a 'fresh' dquot inside a dquot chunk looks like on disk.
- */
-STATIC void
-xfs_qm_dqinit_core(
- xfs_dqid_t id,
- uint type,
- xfs_dqblk_t *d)
-{
- /*
- * Caller has zero'd the entire dquot 'chunk' already.
- */
- d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
- d->dd_diskdq.d_version = XFS_DQUOT_VERSION;
- d->dd_diskdq.d_id = cpu_to_be32(id);
- d->dd_diskdq.d_flags = type;
-}
-
-/*
- * If default limits are in force, push them into the dquot now.
- * We overwrite the dquot limits only if they are zero and this
- * is not the root dquot.
- */
-void
-xfs_qm_adjust_dqlimits(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *d)
-{
- xfs_quotainfo_t *q = mp->m_quotainfo;
-
- ASSERT(d->d_id);
-
- if (q->qi_bsoftlimit && !d->d_blk_softlimit)
- d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
- if (q->qi_bhardlimit && !d->d_blk_hardlimit)
- d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
- if (q->qi_isoftlimit && !d->d_ino_softlimit)
- d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
- if (q->qi_ihardlimit && !d->d_ino_hardlimit)
- d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
- if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
- d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
- if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
- d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
-}
-
-/*
- * Check the limits and timers of a dquot and start or reset timers
- * if necessary.
- * This gets called even when quota enforcement is OFF, which makes our
- * life a little less complicated. (We just don't reject any quota
- * reservations in that case, when enforcement is off).
- * We also return 0 as the values of the timers in Q_GETQUOTA calls, when
- * enforcement's off.
- * In contrast, warnings are a little different in that they don't
- * 'automatically' get started when limits get exceeded. They do
- * get reset to zero, however, when we find the count to be under
- * the soft limit (they are only ever set non-zero via userspace).
- */
-void
-xfs_qm_adjust_dqtimers(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *d)
-{
- ASSERT(d->d_id);
-
-#ifdef QUOTADEBUG
- if (d->d_blk_hardlimit)
- ASSERT(be64_to_cpu(d->d_blk_softlimit) <=
- be64_to_cpu(d->d_blk_hardlimit));
- if (d->d_ino_hardlimit)
- ASSERT(be64_to_cpu(d->d_ino_softlimit) <=
- be64_to_cpu(d->d_ino_hardlimit));
- if (d->d_rtb_hardlimit)
- ASSERT(be64_to_cpu(d->d_rtb_softlimit) <=
- be64_to_cpu(d->d_rtb_hardlimit));
-#endif
- if (!d->d_btimer) {
- if ((d->d_blk_softlimit &&
- (be64_to_cpu(d->d_bcount) >=
- be64_to_cpu(d->d_blk_softlimit))) ||
- (d->d_blk_hardlimit &&
- (be64_to_cpu(d->d_bcount) >=
- be64_to_cpu(d->d_blk_hardlimit)))) {
- d->d_btimer = cpu_to_be32(get_seconds() +
- mp->m_quotainfo->qi_btimelimit);
- } else {
- d->d_bwarns = 0;
- }
- } else {
- if ((!d->d_blk_softlimit ||
- (be64_to_cpu(d->d_bcount) <
- be64_to_cpu(d->d_blk_softlimit))) &&
- (!d->d_blk_hardlimit ||
- (be64_to_cpu(d->d_bcount) <
- be64_to_cpu(d->d_blk_hardlimit)))) {
- d->d_btimer = 0;
- }
- }
-
- if (!d->d_itimer) {
- if ((d->d_ino_softlimit &&
- (be64_to_cpu(d->d_icount) >=
- be64_to_cpu(d->d_ino_softlimit))) ||
- (d->d_ino_hardlimit &&
- (be64_to_cpu(d->d_icount) >=
- be64_to_cpu(d->d_ino_hardlimit)))) {
- d->d_itimer = cpu_to_be32(get_seconds() +
- mp->m_quotainfo->qi_itimelimit);
- } else {
- d->d_iwarns = 0;
- }
- } else {
- if ((!d->d_ino_softlimit ||
- (be64_to_cpu(d->d_icount) <
- be64_to_cpu(d->d_ino_softlimit))) &&
- (!d->d_ino_hardlimit ||
- (be64_to_cpu(d->d_icount) <
- be64_to_cpu(d->d_ino_hardlimit)))) {
- d->d_itimer = 0;
- }
- }
-
- if (!d->d_rtbtimer) {
- if ((d->d_rtb_softlimit &&
- (be64_to_cpu(d->d_rtbcount) >=
- be64_to_cpu(d->d_rtb_softlimit))) ||
- (d->d_rtb_hardlimit &&
- (be64_to_cpu(d->d_rtbcount) >=
- be64_to_cpu(d->d_rtb_hardlimit)))) {
- d->d_rtbtimer = cpu_to_be32(get_seconds() +
- mp->m_quotainfo->qi_rtbtimelimit);
- } else {
- d->d_rtbwarns = 0;
- }
- } else {
- if ((!d->d_rtb_softlimit ||
- (be64_to_cpu(d->d_rtbcount) <
- be64_to_cpu(d->d_rtb_softlimit))) &&
- (!d->d_rtb_hardlimit ||
- (be64_to_cpu(d->d_rtbcount) <
- be64_to_cpu(d->d_rtb_hardlimit)))) {
- d->d_rtbtimer = 0;
- }
- }
-}
-
-/*
- * initialize a buffer full of dquots and log the whole thing
- */
-STATIC void
-xfs_qm_init_dquot_blk(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- xfs_buf_t *bp)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- xfs_dqblk_t *d;
- int curid, i;
-
- ASSERT(tp);
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-
- d = (xfs_dqblk_t *)XFS_BUF_PTR(bp);
-
- /*
- * ID of the first dquot in the block - id's are zero based.
- */
- curid = id - (id % q->qi_dqperchunk);
- ASSERT(curid >= 0);
- memset(d, 0, BBTOB(q->qi_dqchunklen));
- for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++)
- xfs_qm_dqinit_core(curid, type, d);
- xfs_trans_dquot_buf(tp, bp,
- (type & XFS_DQ_USER ? XFS_BLF_UDQUOT_BUF :
- ((type & XFS_DQ_PROJ) ? XFS_BLF_PDQUOT_BUF :
- XFS_BLF_GDQUOT_BUF)));
- xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
-}
-
-
-
-/*
- * Allocate a block and fill it with dquots.
- * This is called when the bmapi finds a hole.
- */
-STATIC int
-xfs_qm_dqalloc(
- xfs_trans_t **tpp,
- xfs_mount_t *mp,
- xfs_dquot_t *dqp,
- xfs_inode_t *quotip,
- xfs_fileoff_t offset_fsb,
- xfs_buf_t **O_bpp)
-{
- xfs_fsblock_t firstblock;
- xfs_bmap_free_t flist;
- xfs_bmbt_irec_t map;
- int nmaps, error, committed;
- xfs_buf_t *bp;
- xfs_trans_t *tp = *tpp;
-
- ASSERT(tp != NULL);
-
- trace_xfs_dqalloc(dqp);
-
- /*
- * Initialize the bmap freelist prior to calling bmapi code.
- */
- xfs_bmap_init(&flist, &firstblock);
- xfs_ilock(quotip, XFS_ILOCK_EXCL);
- /*
- * Return if this type of quotas is turned off while we didn't
- * have an inode lock
- */
- if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
- xfs_iunlock(quotip, XFS_ILOCK_EXCL);
- return (ESRCH);
- }
-
- xfs_trans_ijoin_ref(tp, quotip, XFS_ILOCK_EXCL);
- nmaps = 1;
- if ((error = xfs_bmapi(tp, quotip,
- offset_fsb, XFS_DQUOT_CLUSTER_SIZE_FSB,
- XFS_BMAPI_METADATA | XFS_BMAPI_WRITE,
- &firstblock,
- XFS_QM_DQALLOC_SPACE_RES(mp),
- &map, &nmaps, &flist))) {
- goto error0;
- }
- ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
- ASSERT(nmaps == 1);
- ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
- (map.br_startblock != HOLESTARTBLOCK));
-
- /*
- * Keep track of the blkno to save a lookup later
- */
- dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
- /* now we can just get the buffer (there's nothing to read yet) */
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
- dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen,
- 0);
- if (!bp || (error = XFS_BUF_GETERROR(bp)))
- goto error1;
- /*
- * Make a chunk of dquots out of this buffer and log
- * the entire thing.
- */
- xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id),
- dqp->dq_flags & XFS_DQ_ALLTYPES, bp);
-
- /*
- * xfs_bmap_finish() may commit the current transaction and
- * start a second transaction if the freelist is not empty.
- *
- * Since we still want to modify this buffer, we need to
- * ensure that the buffer is not released on commit of
- * the first transaction and ensure the buffer is added to the
- * second transaction.
- *
- * If there is only one transaction then don't stop the buffer
- * from being released when it commits later on.
- */
-
- xfs_trans_bhold(tp, bp);
-
- if ((error = xfs_bmap_finish(tpp, &flist, &committed))) {
- goto error1;
- }
-
- if (committed) {
- tp = *tpp;
- xfs_trans_bjoin(tp, bp);
- } else {
- xfs_trans_bhold_release(tp, bp);
- }
-
- *O_bpp = bp;
- return 0;
-
- error1:
- xfs_bmap_cancel(&flist);
- error0:
- xfs_iunlock(quotip, XFS_ILOCK_EXCL);
-
- return (error);
-}
-
-/*
- * Maps a dquot to the buffer containing its on-disk version.
- * This returns a ptr to the buffer containing the on-disk dquot
- * in the bpp param, and a ptr to the on-disk dquot within that buffer
- */
-STATIC int
-xfs_qm_dqtobp(
- xfs_trans_t **tpp,
- xfs_dquot_t *dqp,
- xfs_disk_dquot_t **O_ddpp,
- xfs_buf_t **O_bpp,
- uint flags)
-{
- xfs_bmbt_irec_t map;
- int nmaps = 1, error;
- xfs_buf_t *bp;
- xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp);
- xfs_mount_t *mp = dqp->q_mount;
- xfs_disk_dquot_t *ddq;
- xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
- xfs_trans_t *tp = (tpp ? *tpp : NULL);
-
- dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
-
- xfs_ilock(quotip, XFS_ILOCK_SHARED);
- if (XFS_IS_THIS_QUOTA_OFF(dqp)) {
- /*
- * Return if this type of quotas is turned off while we
- * didn't have the quota inode lock.
- */
- xfs_iunlock(quotip, XFS_ILOCK_SHARED);
- return ESRCH;
- }
-
- /*
- * Find the block map; no allocations yet
- */
- error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset,
- XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
- NULL, 0, &map, &nmaps, NULL);
-
- xfs_iunlock(quotip, XFS_ILOCK_SHARED);
- if (error)
- return error;
-
- ASSERT(nmaps == 1);
- ASSERT(map.br_blockcount == 1);
-
- /*
- * Offset of dquot in the (fixed sized) dquot chunk.
- */
- dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) *
- sizeof(xfs_dqblk_t);
-
- ASSERT(map.br_startblock != DELAYSTARTBLOCK);
- if (map.br_startblock == HOLESTARTBLOCK) {
- /*
- * We don't allocate unless we're asked to
- */
- if (!(flags & XFS_QMOPT_DQALLOC))
- return ENOENT;
-
- ASSERT(tp);
- error = xfs_qm_dqalloc(tpp, mp, dqp, quotip,
- dqp->q_fileoffset, &bp);
- if (error)
- return error;
- tp = *tpp;
- } else {
- trace_xfs_dqtobp_read(dqp);
-
- /*
- * store the blkno etc so that we don't have to do the
- * mapping all the time
- */
- dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
-
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen,
- 0, &bp);
- if (error || !bp)
- return XFS_ERROR(error);
- }
-
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
-
- /*
- * calculate the location of the dquot inside the buffer.
- */
- ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
-
- /*
- * A simple sanity check in case we got a corrupted dquot...
- */
- error = xfs_qm_dqcheck(mp, ddq, id, dqp->dq_flags & XFS_DQ_ALLTYPES,
- flags & (XFS_QMOPT_DQREPAIR|XFS_QMOPT_DOWARN),
- "dqtobp");
- if (error) {
- if (!(flags & XFS_QMOPT_DQREPAIR)) {
- xfs_trans_brelse(tp, bp);
- return XFS_ERROR(EIO);
- }
- XFS_BUF_BUSY(bp); /* We dirtied this */
- }
-
- *O_bpp = bp;
- *O_ddpp = ddq;
-
- return (0);
-}
-
-
-/*
- * Read in the ondisk dquot using dqtobp() then copy it to an incore version,
- * and release the buffer immediately.
- *
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqread(
- xfs_trans_t **tpp,
- xfs_dqid_t id,
- xfs_dquot_t *dqp, /* dquot to get filled in */
- uint flags)
-{
- xfs_disk_dquot_t *ddqp;
- xfs_buf_t *bp;
- int error;
- xfs_trans_t *tp;
-
- ASSERT(tpp);
-
- trace_xfs_dqread(dqp);
-
- /*
- * get a pointer to the on-disk dquot and the buffer containing it
- * dqp already knows its own type (GROUP/USER).
- */
- if ((error = xfs_qm_dqtobp(tpp, dqp, &ddqp, &bp, flags))) {
- return (error);
- }
- tp = *tpp;
-
- /* copy everything from disk dquot to the incore dquot */
- memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
- ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
- xfs_qm_dquot_logitem_init(dqp);
-
- /*
- * Reservation counters are defined as reservation plus current usage
- * to avoid having to add every time.
- */
- dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount);
- dqp->q_res_icount = be64_to_cpu(ddqp->d_icount);
- dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount);
-
- /* Mark the buf so that this will stay incore a little longer */
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_DQUOT, XFS_DQUOT_REF);
-
- /*
- * We got the buffer with a xfs_trans_read_buf() (in dqtobp())
- * So we need to release with xfs_trans_brelse().
- * The strategy here is identical to that of inodes; we lock
- * the dquot in xfs_qm_dqget() before making it accessible to
- * others. This is because dquots, like inodes, need a good level of
- * concurrency, and we don't want to take locks on the entire buffers
- * for dquot accesses.
- * Note also that the dquot buffer may even be dirty at this point, if
- * this particular dquot was repaired. We still aren't afraid to
- * brelse it because we have the changes incore.
- */
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- xfs_trans_brelse(tp, bp);
-
- return (error);
-}
-
-
-/*
- * allocate an incore dquot from the kernel heap,
- * and fill its core with quota information kept on disk.
- * If XFS_QMOPT_DQALLOC is set, it'll allocate a dquot on disk
- * if it wasn't already allocated.
- */
-STATIC int
-xfs_qm_idtodq(
- xfs_mount_t *mp,
- xfs_dqid_t id, /* gid or uid, depending on type */
- uint type, /* UDQUOT or GDQUOT */
- uint flags, /* DQALLOC, DQREPAIR */
- xfs_dquot_t **O_dqpp)/* OUT : incore dquot, not locked */
-{
- xfs_dquot_t *dqp;
- int error;
- xfs_trans_t *tp;
- int cancelflags=0;
-
- dqp = xfs_qm_dqinit(mp, id, type);
- tp = NULL;
- if (flags & XFS_QMOPT_DQALLOC) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC);
- error = xfs_trans_reserve(tp, XFS_QM_DQALLOC_SPACE_RES(mp),
- XFS_WRITE_LOG_RES(mp) +
- BBTOB(mp->m_quotainfo->qi_dqchunklen) - 1 +
- 128,
- 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_WRITE_LOG_COUNT);
- if (error) {
- cancelflags = 0;
- goto error0;
- }
- cancelflags = XFS_TRANS_RELEASE_LOG_RES;
- }
-
- /*
- * Read it from disk; xfs_dqread() takes care of
- * all the necessary initialization of dquot's fields (locks, etc)
- */
- if ((error = xfs_qm_dqread(&tp, id, dqp, flags))) {
- /*
- * This can happen if quotas got turned off (ESRCH),
- * or if the dquot didn't exist on disk and we ask to
- * allocate (ENOENT).
- */
- trace_xfs_dqread_fail(dqp);
- cancelflags |= XFS_TRANS_ABORT;
- goto error0;
- }
- if (tp) {
- if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES)))
- goto error1;
- }
-
- *O_dqpp = dqp;
- return (0);
-
- error0:
- ASSERT(error);
- if (tp)
- xfs_trans_cancel(tp, cancelflags);
- error1:
- xfs_qm_dqdestroy(dqp);
- *O_dqpp = NULL;
- return (error);
-}
-
-/*
- * Lookup a dquot in the incore dquot hashtable. We keep two separate
- * hashtables for user and group dquots; and, these are global tables
- * inside the XQM, not per-filesystem tables.
- * The hash chain must be locked by caller, and it is left locked
- * on return. Returning dquot is locked.
- */
-STATIC int
-xfs_qm_dqlookup(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- xfs_dqhash_t *qh,
- xfs_dquot_t **O_dqpp)
-{
- xfs_dquot_t *dqp;
- uint flist_locked;
-
- ASSERT(mutex_is_locked(&qh->qh_lock));
-
- flist_locked = B_FALSE;
-
- /*
- * Traverse the hashchain looking for a match
- */
- list_for_each_entry(dqp, &qh->qh_list, q_hashlist) {
- /*
- * We already have the hashlock. We don't need the
- * dqlock to look at the id field of the dquot, since the
- * id can't be modified without the hashlock anyway.
- */
- if (be32_to_cpu(dqp->q_core.d_id) == id && dqp->q_mount == mp) {
- trace_xfs_dqlookup_found(dqp);
-
- /*
- * All in core dquots must be on the dqlist of mp
- */
- ASSERT(!list_empty(&dqp->q_mplist));
-
- xfs_dqlock(dqp);
- if (dqp->q_nrefs == 0) {
- ASSERT(!list_empty(&dqp->q_freelist));
- if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
- trace_xfs_dqlookup_want(dqp);
-
- /*
- * We may have raced with dqreclaim_one()
- * (and lost). So, flag that we don't
- * want the dquot to be reclaimed.
- */
- dqp->dq_flags |= XFS_DQ_WANT;
- xfs_dqunlock(dqp);
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
- xfs_dqlock(dqp);
- dqp->dq_flags &= ~(XFS_DQ_WANT);
- }
- flist_locked = B_TRUE;
- }
-
- /*
- * id couldn't have changed; we had the hashlock all
- * along
- */
- ASSERT(be32_to_cpu(dqp->q_core.d_id) == id);
-
- if (flist_locked) {
- if (dqp->q_nrefs != 0) {
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- flist_locked = B_FALSE;
- } else {
- /* take it off the freelist */
- trace_xfs_dqlookup_freelist(dqp);
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- }
- }
-
- XFS_DQHOLD(dqp);
-
- if (flist_locked)
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- /*
- * move the dquot to the front of the hashchain
- */
- ASSERT(mutex_is_locked(&qh->qh_lock));
- list_move(&dqp->q_hashlist, &qh->qh_list);
- trace_xfs_dqlookup_done(dqp);
- *O_dqpp = dqp;
- return 0;
- }
- }
-
- *O_dqpp = NULL;
- ASSERT(mutex_is_locked(&qh->qh_lock));
- return (1);
-}
-
-/*
- * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
- * a locked dquot, doing an allocation (if requested) as needed.
- * When both an inode and an id are given, the inode's id takes precedence.
- * That is, if the id changes while we don't hold the ilock inside this
- * function, the new dquot is returned, not necessarily the one requested
- * in the id argument.
- */
-int
-xfs_qm_dqget(
- xfs_mount_t *mp,
- xfs_inode_t *ip, /* locked inode (optional) */
- xfs_dqid_t id, /* uid/projid/gid depending on type */
- uint type, /* XFS_DQ_USER/XFS_DQ_PROJ/XFS_DQ_GROUP */
- uint flags, /* DQALLOC, DQSUSER, DQREPAIR, DOWARN */
- xfs_dquot_t **O_dqpp) /* OUT : locked incore dquot */
-{
- xfs_dquot_t *dqp;
- xfs_dqhash_t *h;
- uint version;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
- if ((! XFS_IS_UQUOTA_ON(mp) && type == XFS_DQ_USER) ||
- (! XFS_IS_PQUOTA_ON(mp) && type == XFS_DQ_PROJ) ||
- (! XFS_IS_GQUOTA_ON(mp) && type == XFS_DQ_GROUP)) {
- return (ESRCH);
- }
- h = XFS_DQ_HASH(mp, id, type);
-
-#ifdef DEBUG
- if (xfs_do_dqerror) {
- if ((xfs_dqerror_target == mp->m_ddev_targp) &&
- (xfs_dqreq_num++ % xfs_dqerror_mod) == 0) {
- xfs_debug(mp, "Returning error in dqget");
- return (EIO);
- }
- }
-#endif
-
- again:
-
-#ifdef DEBUG
- ASSERT(type == XFS_DQ_USER ||
- type == XFS_DQ_PROJ ||
- type == XFS_DQ_GROUP);
- if (ip) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (type == XFS_DQ_USER)
- ASSERT(ip->i_udquot == NULL);
- else
- ASSERT(ip->i_gdquot == NULL);
- }
-#endif
- mutex_lock(&h->qh_lock);
-
- /*
- * Look in the cache (hashtable).
- * The chain is kept locked during lookup.
- */
- if (xfs_qm_dqlookup(mp, id, h, O_dqpp) == 0) {
- XQM_STATS_INC(xqmstats.xs_qm_dqcachehits);
- /*
- * The dquot was found, moved to the front of the chain,
- * taken off the freelist if it was on it, and locked
- * at this point. Just unlock the hashchain and return.
- */
- ASSERT(*O_dqpp);
- ASSERT(XFS_DQ_IS_LOCKED(*O_dqpp));
- mutex_unlock(&h->qh_lock);
- trace_xfs_dqget_hit(*O_dqpp);
- return (0); /* success */
- }
- XQM_STATS_INC(xqmstats.xs_qm_dqcachemisses);
-
- /*
- * Dquot cache miss. We don't want to keep the inode lock across
- * a (potential) disk read. Also we don't want to deal with the lock
- * ordering between quotainode and this inode. OTOH, dropping the inode
- * lock here means dealing with a chown that can happen before
- * we re-acquire the lock.
- */
- if (ip)
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * Save the hashchain version stamp, and unlock the chain, so that
- * we don't keep the lock across a disk read
- */
- version = h->qh_version;
- mutex_unlock(&h->qh_lock);
-
- /*
- * Allocate the dquot on the kernel heap, and read the ondisk
- * portion off the disk. Also, do all the necessary initialization
- * This can return ENOENT if dquot didn't exist on disk and we didn't
- * ask it to allocate; ESRCH if quotas got turned off suddenly.
- */
- if ((error = xfs_qm_idtodq(mp, id, type,
- flags & (XFS_QMOPT_DQALLOC|XFS_QMOPT_DQREPAIR|
- XFS_QMOPT_DOWARN),
- &dqp))) {
- if (ip)
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- return (error);
- }
-
- /*
- * See if this is mount code calling to look at the overall quota limits
- * which are stored in the id == 0 user or group's dquot.
- * Since we may not have done a quotacheck by this point, just return
- * the dquot without attaching it to any hashtables, lists, etc, or even
- * taking a reference.
- * The caller must dqdestroy this once done.
- */
- if (flags & XFS_QMOPT_DQSUSER) {
- ASSERT(id == 0);
- ASSERT(! ip);
- goto dqret;
- }
-
- /*
- * Dquot lock comes after hashlock in the lock ordering
- */
- if (ip) {
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- /*
- * A dquot could be attached to this inode by now, since
- * we had dropped the ilock.
- */
- if (type == XFS_DQ_USER) {
- if (!XFS_IS_UQUOTA_ON(mp)) {
- /* inode stays locked on return */
- xfs_qm_dqdestroy(dqp);
- return XFS_ERROR(ESRCH);
- }
- if (ip->i_udquot) {
- xfs_qm_dqdestroy(dqp);
- dqp = ip->i_udquot;
- xfs_dqlock(dqp);
- goto dqret;
- }
- } else {
- if (!XFS_IS_OQUOTA_ON(mp)) {
- /* inode stays locked on return */
- xfs_qm_dqdestroy(dqp);
- return XFS_ERROR(ESRCH);
- }
- if (ip->i_gdquot) {
- xfs_qm_dqdestroy(dqp);
- dqp = ip->i_gdquot;
- xfs_dqlock(dqp);
- goto dqret;
- }
- }
- }
-
- /*
- * Hashlock comes after ilock in lock order
- */
- mutex_lock(&h->qh_lock);
- if (version != h->qh_version) {
- xfs_dquot_t *tmpdqp;
- /*
- * Now, see if somebody else put the dquot in the
- * hashtable before us. This can happen because we didn't
- * keep the hashchain lock. We don't have to worry about
- * lock order between the two dquots here since dqp isn't
- * on any findable lists yet.
- */
- if (xfs_qm_dqlookup(mp, id, h, &tmpdqp) == 0) {
- /*
- * Duplicate found. Just throw away the new dquot
- * and start over.
- */
- xfs_qm_dqput(tmpdqp);
- mutex_unlock(&h->qh_lock);
- xfs_qm_dqdestroy(dqp);
- XQM_STATS_INC(xqmstats.xs_qm_dquot_dups);
- goto again;
- }
- }
-
- /*
- * Put the dquot at the beginning of the hash-chain and mp's list
- * LOCK ORDER: hashlock, freelistlock, mplistlock, udqlock, gdqlock ..
- */
- ASSERT(mutex_is_locked(&h->qh_lock));
- dqp->q_hash = h;
- list_add(&dqp->q_hashlist, &h->qh_list);
- h->qh_version++;
-
- /*
- * Attach this dquot to this filesystem's list of all dquots,
- * kept inside the mount structure in m_quotainfo field
- */
- mutex_lock(&mp->m_quotainfo->qi_dqlist_lock);
-
- /*
- * We return a locked dquot to the caller, with a reference taken
- */
- xfs_dqlock(dqp);
- dqp->q_nrefs = 1;
-
- list_add(&dqp->q_mplist, &mp->m_quotainfo->qi_dqlist);
- mp->m_quotainfo->qi_dquots++;
- mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
- mutex_unlock(&h->qh_lock);
- dqret:
- ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
- trace_xfs_dqget_miss(dqp);
- *O_dqpp = dqp;
- return (0);
-}
-
-
-/*
- * Release a reference to the dquot (decrement ref-count)
- * and unlock it. If there is a group quota attached to this
- * dquot, carefully release that too without tripping over
- * deadlocks'n'stuff.
- */
-void
-xfs_qm_dqput(
- xfs_dquot_t *dqp)
-{
- xfs_dquot_t *gdqp;
-
- ASSERT(dqp->q_nrefs > 0);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- trace_xfs_dqput(dqp);
-
- if (dqp->q_nrefs != 1) {
- dqp->q_nrefs--;
- xfs_dqunlock(dqp);
- return;
- }
-
- /*
- * drop the dqlock and acquire the freelist and dqlock
- * in the right order; but try to get it out-of-order first
- */
- if (!mutex_trylock(&xfs_Gqm->qm_dqfrlist_lock)) {
- trace_xfs_dqput_wait(dqp);
- xfs_dqunlock(dqp);
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
- xfs_dqlock(dqp);
- }
-
- while (1) {
- gdqp = NULL;
-
- /* We can't depend on nrefs being == 1 here */
- if (--dqp->q_nrefs == 0) {
- trace_xfs_dqput_free(dqp);
-
- list_add_tail(&dqp->q_freelist, &xfs_Gqm->qm_dqfrlist);
- xfs_Gqm->qm_dqfrlist_cnt++;
-
- /*
- * If we just added a udquot to the freelist, then
- * we want to release the gdquot reference that
- * it (probably) has. Otherwise it'll keep the
- * gdquot from getting reclaimed.
- */
- if ((gdqp = dqp->q_gdquot)) {
- /*
- * Avoid a recursive dqput call
- */
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
- }
- xfs_dqunlock(dqp);
-
- /*
- * If we had a group quota inside the user quota as a hint,
- * release it now.
- */
- if (! gdqp)
- break;
- dqp = gdqp;
- }
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-}
-
-/*
- * Release a dquot. Flush it if dirty, then dqput() it.
- * dquot must not be locked.
- */
-void
-xfs_qm_dqrele(
- xfs_dquot_t *dqp)
-{
- if (!dqp)
- return;
-
- trace_xfs_dqrele(dqp);
-
- xfs_dqlock(dqp);
- /*
- * We don't care to flush it if the dquot is dirty here.
- * That will create stutters that we want to avoid.
- * Instead we do a delayed write when we try to reclaim
- * a dirty dquot. Also xfs_sync will take part of the burden...
- */
- xfs_qm_dqput(dqp);
-}
-
-/*
- * This is the dquot flushing I/O completion routine. It is called
- * from interrupt level when the buffer containing the dquot is
- * flushed to disk. It is responsible for removing the dquot logitem
- * from the AIL if it has not been re-logged, and unlocking the dquot's
- * flush lock. This behavior is very similar to that of inodes..
- */
-STATIC void
-xfs_qm_dqflush_done(
- struct xfs_buf *bp,
- struct xfs_log_item *lip)
-{
- xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
- xfs_dquot_t *dqp = qip->qli_dquot;
- struct xfs_ail *ailp = lip->li_ailp;
-
- /*
- * We only want to pull the item from the AIL if its
- * location in the log has not changed since we started the flush.
- * Thus, we only bother if the dquot's lsn has
- * not changed. First we check the lsn outside the lock
- * since it's cheaper, and then we recheck while
- * holding the lock before removing the dquot from the AIL.
- */
- if ((lip->li_flags & XFS_LI_IN_AIL) &&
- lip->li_lsn == qip->qli_flush_lsn) {
-
- /* xfs_trans_ail_delete() drops the AIL lock. */
- spin_lock(&ailp->xa_lock);
- if (lip->li_lsn == qip->qli_flush_lsn)
- xfs_trans_ail_delete(ailp, lip);
- else
- spin_unlock(&ailp->xa_lock);
- }
-
- /*
- * Release the dq's flush lock since we're done with it.
- */
- xfs_dqfunlock(dqp);
-}
-
-/*
- * Write a modified dquot to disk.
- * The dquot must be locked and the flush lock too taken by caller.
- * The flush lock will not be unlocked until the dquot reaches the disk,
- * but the dquot is free to be unlocked and modified by the caller
- * in the interim. Dquot is still locked on return. This behavior is
- * identical to that of inodes.
- */
-int
-xfs_qm_dqflush(
- xfs_dquot_t *dqp,
- uint flags)
-{
- struct xfs_mount *mp = dqp->q_mount;
- struct xfs_buf *bp;
- struct xfs_disk_dquot *ddqp;
- int error;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(!completion_done(&dqp->q_flush));
-
- trace_xfs_dqflush(dqp);
-
- /*
- * If not dirty, or it's pinned and we are not supposed to block, nada.
- */
- if (!XFS_DQ_IS_DIRTY(dqp) ||
- (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) {
- xfs_dqfunlock(dqp);
- return 0;
- }
- xfs_qm_dqunpin_wait(dqp);
-
- /*
- * This may have been unpinned because the filesystem is shutting
- * down forcibly. If that's the case we must not write this dquot
- * to disk, because the log record didn't make it to disk!
- */
- if (XFS_FORCED_SHUTDOWN(mp)) {
- dqp->dq_flags &= ~XFS_DQ_DIRTY;
- xfs_dqfunlock(dqp);
- return XFS_ERROR(EIO);
- }
-
- /*
- * Get the buffer containing the on-disk dquot
- */
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp);
- if (error) {
- ASSERT(error != ENOENT);
- xfs_dqfunlock(dqp);
- return error;
- }
-
- /*
- * Calculate the location of the dquot inside the buffer.
- */
- ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset);
-
- /*
- * A simple sanity check in case we got a corrupted dquot..
- */
- error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,
- XFS_QMOPT_DOWARN, "dqflush (incore copy)");
- if (error) {
- xfs_buf_relse(bp);
- xfs_dqfunlock(dqp);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return XFS_ERROR(EIO);
- }
-
- /* This is the only portion of data that needs to persist */
- memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
-
- /*
- * Clear the dirty field and remember the flush lsn for later use.
- */
- dqp->dq_flags &= ~XFS_DQ_DIRTY;
-
- xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn,
- &dqp->q_logitem.qli_item.li_lsn);
-
- /*
- * Attach an iodone routine so that we can remove this dquot from the
- * AIL and release the flush lock once the dquot is synced to disk.
- */
- xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done,
- &dqp->q_logitem.qli_item);
-
- /*
- * If the buffer is pinned then push on the log so we won't
- * get stuck waiting in the write for too long.
- */
- if (XFS_BUF_ISPINNED(bp)) {
- trace_xfs_dqflush_force(dqp);
- xfs_log_force(mp, 0);
- }
-
- if (flags & SYNC_WAIT)
- error = xfs_bwrite(mp, bp);
- else
- xfs_bdwrite(mp, bp);
-
- trace_xfs_dqflush_done(dqp);
-
- /*
- * dqp is still locked, but caller is free to unlock it now.
- */
- return error;
-
-}
-
-int
-xfs_qm_dqlock_nowait(
- xfs_dquot_t *dqp)
-{
- return mutex_trylock(&dqp->q_qlock);
-}
-
-void
-xfs_dqlock(
- xfs_dquot_t *dqp)
-{
- mutex_lock(&dqp->q_qlock);
-}
-
-void
-xfs_dqunlock(
- xfs_dquot_t *dqp)
-{
- mutex_unlock(&(dqp->q_qlock));
- if (dqp->q_logitem.qli_dquot == dqp) {
- /* Once was dqp->q_mount, but might just have been cleared */
- xfs_trans_unlocked_item(dqp->q_logitem.qli_item.li_ailp,
- (xfs_log_item_t*)&(dqp->q_logitem));
- }
-}
-
-
-void
-xfs_dqunlock_nonotify(
- xfs_dquot_t *dqp)
-{
- mutex_unlock(&(dqp->q_qlock));
-}
-
-/*
- * Lock two xfs_dquot structures.
- *
- * To avoid deadlocks we always lock the quota structure with
- * the lowerd id first.
- */
-void
-xfs_dqlock2(
- xfs_dquot_t *d1,
- xfs_dquot_t *d2)
-{
- if (d1 && d2) {
- ASSERT(d1 != d2);
- if (be32_to_cpu(d1->q_core.d_id) >
- be32_to_cpu(d2->q_core.d_id)) {
- mutex_lock(&d2->q_qlock);
- mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED);
- } else {
- mutex_lock(&d1->q_qlock);
- mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED);
- }
- } else if (d1) {
- mutex_lock(&d1->q_qlock);
- } else if (d2) {
- mutex_lock(&d2->q_qlock);
- }
-}
-
-
-/*
- * Take a dquot out of the mount's dqlist as well as the hashlist.
- * This is called via unmount as well as quotaoff, and the purge
- * will always succeed unless there are soft (temp) references
- * outstanding.
- *
- * This returns 0 if it was purged, 1 if it wasn't. It's not an error code
- * that we're returning! XXXsup - not cool.
- */
-/* ARGSUSED */
-int
-xfs_qm_dqpurge(
- xfs_dquot_t *dqp)
-{
- xfs_dqhash_t *qh = dqp->q_hash;
- xfs_mount_t *mp = dqp->q_mount;
-
- ASSERT(mutex_is_locked(&mp->m_quotainfo->qi_dqlist_lock));
- ASSERT(mutex_is_locked(&dqp->q_hash->qh_lock));
-
- xfs_dqlock(dqp);
- /*
- * We really can't afford to purge a dquot that is
- * referenced, because these are hard refs.
- * It shouldn't happen in general because we went thru _all_ inodes in
- * dqrele_all_inodes before calling this and didn't let the mountlock go.
- * However it is possible that we have dquots with temporary
- * references that are not attached to an inode. e.g. see xfs_setattr().
- */
- if (dqp->q_nrefs != 0) {
- xfs_dqunlock(dqp);
- mutex_unlock(&dqp->q_hash->qh_lock);
- return (1);
- }
-
- ASSERT(!list_empty(&dqp->q_freelist));
-
- /*
- * If we're turning off quotas, we have to make sure that, for
- * example, we don't delete quota disk blocks while dquots are
- * in the process of getting written to those disk blocks.
- * This dquot might well be on AIL, and we can't leave it there
- * if we're turning off quotas. Basically, we need this flush
- * lock, and are willing to block on it.
- */
- if (!xfs_dqflock_nowait(dqp)) {
- /*
- * Block on the flush lock after nudging dquot buffer,
- * if it is incore.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
-
- /*
- * XXXIf we're turning this type of quotas off, we don't care
- * about the dirty metadata sitting in this dquot. OTOH, if
- * we're unmounting, we do care, so we flush it and wait.
- */
- if (XFS_DQ_IS_DIRTY(dqp)) {
- int error;
-
- /* dqflush unlocks dqflock */
- /*
- * Given that dqpurge is a very rare occurrence, it is OK
- * that we're holding the hashlist and mplist locks
- * across the disk write. But, ... XXXsup
- *
- * We don't care about getting disk errors here. We need
- * to purge this dquot anyway, so we go ahead regardless.
- */
- error = xfs_qm_dqflush(dqp, SYNC_WAIT);
- if (error)
- xfs_warn(mp, "%s: dquot %p flush failed",
- __func__, dqp);
- xfs_dqflock(dqp);
- }
- ASSERT(atomic_read(&dqp->q_pincount) == 0);
- ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
- !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
-
- list_del_init(&dqp->q_hashlist);
- qh->qh_version++;
- list_del_init(&dqp->q_mplist);
- mp->m_quotainfo->qi_dqreclaims++;
- mp->m_quotainfo->qi_dquots--;
- /*
- * XXX Move this to the front of the freelist, if we can get the
- * freelist lock.
- */
- ASSERT(!list_empty(&dqp->q_freelist));
-
- dqp->q_mount = NULL;
- dqp->q_hash = NULL;
- dqp->dq_flags = XFS_DQ_INACTIVE;
- memset(&dqp->q_core, 0, sizeof(dqp->q_core));
- xfs_dqfunlock(dqp);
- xfs_dqunlock(dqp);
- mutex_unlock(&qh->qh_lock);
- return (0);
-}
-
-
-#ifdef QUOTADEBUG
-void
-xfs_qm_dqprint(xfs_dquot_t *dqp)
-{
- struct xfs_mount *mp = dqp->q_mount;
-
- xfs_debug(mp, "-----------KERNEL DQUOT----------------");
- xfs_debug(mp, "---- dquotID = %d",
- (int)be32_to_cpu(dqp->q_core.d_id));
- xfs_debug(mp, "---- type = %s", DQFLAGTO_TYPESTR(dqp));
- xfs_debug(mp, "---- fs = 0x%p", dqp->q_mount);
- xfs_debug(mp, "---- blkno = 0x%x", (int) dqp->q_blkno);
- xfs_debug(mp, "---- boffset = 0x%x", (int) dqp->q_bufoffset);
- xfs_debug(mp, "---- blkhlimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_blk_hardlimit),
- (int)be64_to_cpu(dqp->q_core.d_blk_hardlimit));
- xfs_debug(mp, "---- blkslimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_blk_softlimit),
- (int)be64_to_cpu(dqp->q_core.d_blk_softlimit));
- xfs_debug(mp, "---- inohlimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_ino_hardlimit),
- (int)be64_to_cpu(dqp->q_core.d_ino_hardlimit));
- xfs_debug(mp, "---- inoslimit = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_ino_softlimit),
- (int)be64_to_cpu(dqp->q_core.d_ino_softlimit));
- xfs_debug(mp, "---- bcount = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_bcount),
- (int)be64_to_cpu(dqp->q_core.d_bcount));
- xfs_debug(mp, "---- icount = %Lu (0x%x)",
- be64_to_cpu(dqp->q_core.d_icount),
- (int)be64_to_cpu(dqp->q_core.d_icount));
- xfs_debug(mp, "---- btimer = %d",
- (int)be32_to_cpu(dqp->q_core.d_btimer));
- xfs_debug(mp, "---- itimer = %d",
- (int)be32_to_cpu(dqp->q_core.d_itimer));
- xfs_debug(mp, "---------------------------");
-}
-#endif
-
-/*
- * Give the buffer a little push if it is incore and
- * wait on the flush lock.
- */
-void
-xfs_qm_dqflock_pushbuf_wait(
- xfs_dquot_t *dqp)
-{
- xfs_mount_t *mp = dqp->q_mount;
- xfs_buf_t *bp;
-
- /*
- * Check to see if the dquot has been flushed delayed
- * write. If so, grab its buffer and send it
- * out immediately. We'll be able to acquire
- * the flush lock when the I/O completes.
- */
- bp = xfs_incore(mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
- if (!bp)
- goto out_lock;
-
- if (XFS_BUF_ISDELAYWRITE(bp)) {
- if (XFS_BUF_ISPINNED(bp))
- xfs_log_force(mp, 0);
- xfs_buf_delwri_promote(bp);
- wake_up_process(bp->b_target->bt_task);
- }
- xfs_buf_relse(bp);
-out_lock:
- xfs_dqflock(dqp);
-}
diff --git a/fs/xfs/quota/xfs_dquot.h b/fs/xfs/quota/xfs_dquot.h
deleted file mode 100644
index 5da3a23..0000000
--- a/fs/xfs/quota/xfs_dquot.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DQUOT_H__
-#define __XFS_DQUOT_H__
-
-/*
- * Dquots are structures that hold quota information about a user or a group,
- * much like inodes are for files. In fact, dquots share many characteristics
- * with inodes. However, dquots can also be a centralized resource, relative
- * to a collection of inodes. In this respect, dquots share some characteristics
- * of the superblock.
- * XFS dquots exploit both those in its algorithms. They make every attempt
- * to not be a bottleneck when quotas are on and have minimal impact, if any,
- * when quotas are off.
- */
-
-/*
- * The hash chain headers (hash buckets)
- */
-typedef struct xfs_dqhash {
- struct list_head qh_list;
- struct mutex qh_lock;
- uint qh_version; /* ever increasing version */
- uint qh_nelems; /* number of dquots on the list */
-} xfs_dqhash_t;
-
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The incore dquot structure
- */
-typedef struct xfs_dquot {
- uint dq_flags; /* various flags (XFS_DQ_*) */
- struct list_head q_freelist; /* global free list of dquots */
- struct list_head q_mplist; /* mount's list of dquots */
- struct list_head q_hashlist; /* gloabl hash list of dquots */
- xfs_dqhash_t *q_hash; /* the hashchain header */
- struct xfs_mount*q_mount; /* filesystem this relates to */
- struct xfs_trans*q_transp; /* trans this belongs to currently */
- uint q_nrefs; /* # active refs from inodes */
- xfs_daddr_t q_blkno; /* blkno of dquot buffer */
- int q_bufoffset; /* off of dq in buffer (# dquots) */
- xfs_fileoff_t q_fileoffset; /* offset in quotas file */
-
- struct xfs_dquot*q_gdquot; /* group dquot, hint only */
- xfs_disk_dquot_t q_core; /* actual usage & quotas */
- xfs_dq_logitem_t q_logitem; /* dquot log item */
- xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
- xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
- xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
- struct mutex q_qlock; /* quota lock */
- struct completion q_flush; /* flush completion queue */
- atomic_t q_pincount; /* dquot pin count */
- wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
-} xfs_dquot_t;
-
-/*
- * Lock hierarchy for q_qlock:
- * XFS_QLOCK_NORMAL is the implicit default,
- * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
- */
-enum {
- XFS_QLOCK_NORMAL = 0,
- XFS_QLOCK_NESTED,
-};
-
-#define XFS_DQHOLD(dqp) ((dqp)->q_nrefs++)
-
-/*
- * Manage the q_flush completion queue embedded in the dquot. This completion
- * queue synchronizes processes attempting to flush the in-core dquot back to
- * disk.
- */
-static inline void xfs_dqflock(xfs_dquot_t *dqp)
-{
- wait_for_completion(&dqp->q_flush);
-}
-
-static inline int xfs_dqflock_nowait(xfs_dquot_t *dqp)
-{
- return try_wait_for_completion(&dqp->q_flush);
-}
-
-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
-{
- complete(&dqp->q_flush);
-}
-
-#define XFS_DQ_IS_LOCKED(dqp) (mutex_is_locked(&((dqp)->q_qlock)))
-#define XFS_DQ_IS_DIRTY(dqp) ((dqp)->dq_flags & XFS_DQ_DIRTY)
-#define XFS_QM_ISUDQ(dqp) ((dqp)->dq_flags & XFS_DQ_USER)
-#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
-#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP)
-#define XFS_DQ_TO_QINF(dqp) ((dqp)->q_mount->m_quotainfo)
-#define XFS_DQ_TO_QIP(dqp) (XFS_QM_ISUDQ(dqp) ? \
- XFS_DQ_TO_QINF(dqp)->qi_uquotaip : \
- XFS_DQ_TO_QINF(dqp)->qi_gquotaip)
-
-#define XFS_IS_THIS_QUOTA_OFF(d) (! (XFS_QM_ISUDQ(d) ? \
- (XFS_IS_UQUOTA_ON((d)->q_mount)) : \
- (XFS_IS_OQUOTA_ON((d)->q_mount))))
-
-#ifdef QUOTADEBUG
-extern void xfs_qm_dqprint(xfs_dquot_t *);
-#else
-#define xfs_qm_dqprint(a)
-#endif
-
-extern void xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int xfs_qm_dqflush(xfs_dquot_t *, uint);
-extern int xfs_qm_dqpurge(xfs_dquot_t *);
-extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern int xfs_qm_dqlock_nowait(xfs_dquot_t *);
-extern void xfs_qm_dqflock_pushbuf_wait(xfs_dquot_t *dqp);
-extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
- xfs_disk_dquot_t *);
-extern void xfs_qm_adjust_dqlimits(xfs_mount_t *,
- xfs_disk_dquot_t *);
-extern int xfs_qm_dqget(xfs_mount_t *, xfs_inode_t *,
- xfs_dqid_t, uint, uint, xfs_dquot_t **);
-extern void xfs_qm_dqput(xfs_dquot_t *);
-extern void xfs_dqlock(xfs_dquot_t *);
-extern void xfs_dqlock2(xfs_dquot_t *, xfs_dquot_t *);
-extern void xfs_dqunlock(xfs_dquot_t *);
-extern void xfs_dqunlock_nonotify(xfs_dquot_t *);
-
-#endif /* __XFS_DQUOT_H__ */
diff --git a/fs/xfs/quota/xfs_dquot_item.c b/fs/xfs/quota/xfs_dquot_item.c
deleted file mode 100644
index 8126fc2..0000000
--- a/fs/xfs/quota/xfs_dquot_item.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_itable.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)
-{
- return container_of(lip, struct xfs_dq_logitem, qli_item);
-}
-
-/*
- * returns the number of iovecs needed to log the given dquot item.
- */
-STATIC uint
-xfs_qm_dquot_logitem_size(
- struct xfs_log_item *lip)
-{
- /*
- * we need only two iovecs, one for the format, one for the real thing
- */
- return 2;
-}
-
-/*
- * fills in the vector of log iovecs for the given dquot log item.
- */
-STATIC void
-xfs_qm_dquot_logitem_format(
- struct xfs_log_item *lip,
- struct xfs_log_iovec *logvec)
-{
- struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
-
- logvec->i_addr = &qlip->qli_format;
- logvec->i_len = sizeof(xfs_dq_logformat_t);
- logvec->i_type = XLOG_REG_TYPE_QFORMAT;
- logvec++;
- logvec->i_addr = &qlip->qli_dquot->q_core;
- logvec->i_len = sizeof(xfs_disk_dquot_t);
- logvec->i_type = XLOG_REG_TYPE_DQUOT;
-
- ASSERT(2 == lip->li_desc->lid_size);
- qlip->qli_format.qlf_size = 2;
-
-}
-
-/*
- * Increment the pin count of the given dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_pin(
- struct xfs_log_item *lip)
-{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- atomic_inc(&dqp->q_pincount);
-}
-
-/*
- * Decrement the pin count of the given dquot, and wake up
- * anyone in xfs_dqwait_unpin() if the count goes to 0. The
- * dquot must have been previously pinned with a call to
- * xfs_qm_dquot_logitem_pin().
- */
-STATIC void
-xfs_qm_dquot_logitem_unpin(
- struct xfs_log_item *lip,
- int remove)
-{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
- ASSERT(atomic_read(&dqp->q_pincount) > 0);
- if (atomic_dec_and_test(&dqp->q_pincount))
- wake_up(&dqp->q_pinwait);
-}
-
-/*
- * Given the logitem, this writes the corresponding dquot entry to disk
- * asynchronously. This is called with the dquot entry securely locked;
- * we simply get xfs_qm_dqflush() to do the work, and unlock the dquot
- * at the end.
- */
-STATIC void
-xfs_qm_dquot_logitem_push(
- struct xfs_log_item *lip)
-{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
- int error;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(!completion_done(&dqp->q_flush));
-
- /*
- * Since we were able to lock the dquot's flush lock and
- * we found it on the AIL, the dquot must be dirty. This
- * is because the dquot is removed from the AIL while still
- * holding the flush lock in xfs_dqflush_done(). Thus, if
- * we found it in the AIL and were able to obtain the flush
- * lock without sleeping, then there must not have been
- * anyone in the process of flushing the dquot.
- */
- error = xfs_qm_dqflush(dqp, 0);
- if (error)
- xfs_warn(dqp->q_mount, "%s: push error %d on dqp %p",
- __func__, error, dqp);
- xfs_dqunlock(dqp);
-}
-
-STATIC xfs_lsn_t
-xfs_qm_dquot_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- /*
- * We always re-log the entire dquot when it becomes dirty,
- * so, the latest copy _is_ the only one that matters.
- */
- return lsn;
-}
-
-/*
- * This is called to wait for the given dquot to be unpinned.
- * Most of these pin/unpin routines are plagiarized from inode code.
- */
-void
-xfs_qm_dqunpin_wait(
- struct xfs_dquot *dqp)
-{
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- if (atomic_read(&dqp->q_pincount) == 0)
- return;
-
- /*
- * Give the log a push so we don't wait here too long.
- */
- xfs_log_force(dqp->q_mount, 0);
- wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0));
-}
-
-/*
- * This is called when IOP_TRYLOCK returns XFS_ITEM_PUSHBUF to indicate that
- * the dquot is locked by us, but the flush lock isn't. So, here we are
- * going to see if the relevant dquot buffer is incore, waiting on DELWRI.
- * If so, we want to push it out to help us take this item off the AIL as soon
- * as possible.
- *
- * We must not be holding the AIL lock at this point. Calling incore() to
- * search the buffer cache can be a time consuming thing, and AIL lock is a
- * spinlock.
- */
-STATIC bool
-xfs_qm_dquot_logitem_pushbuf(
- struct xfs_log_item *lip)
-{
- struct xfs_dq_logitem *qlip = DQUOT_ITEM(lip);
- struct xfs_dquot *dqp = qlip->qli_dquot;
- struct xfs_buf *bp;
- bool ret = true;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- /*
- * If flushlock isn't locked anymore, chances are that the
- * inode flush completed and the inode was taken off the AIL.
- * So, just get out.
- */
- if (completion_done(&dqp->q_flush) ||
- !(lip->li_flags & XFS_LI_IN_AIL)) {
- xfs_dqunlock(dqp);
- return true;
- }
-
- bp = xfs_incore(dqp->q_mount->m_ddev_targp, qlip->qli_format.qlf_blkno,
- dqp->q_mount->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK);
- xfs_dqunlock(dqp);
- if (!bp)
- return true;
- if (XFS_BUF_ISDELAYWRITE(bp))
- xfs_buf_delwri_promote(bp);
- if (XFS_BUF_ISPINNED(bp))
- ret = false;
- xfs_buf_relse(bp);
- return ret;
-}
-
-/*
- * This is called to attempt to lock the dquot associated with this
- * dquot log item. Don't sleep on the dquot lock or the flush lock.
- * If the flush lock is already held, indicating that the dquot has
- * been or is in the process of being flushed, then see if we can
- * find the dquot's buffer in the buffer cache without sleeping. If
- * we can and it is marked delayed write, then we want to send it out.
- * We delay doing so until the push routine, though, to avoid sleeping
- * in any device strategy routines.
- */
-STATIC uint
-xfs_qm_dquot_logitem_trylock(
- struct xfs_log_item *lip)
-{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
- if (atomic_read(&dqp->q_pincount) > 0)
- return XFS_ITEM_PINNED;
-
- if (!xfs_qm_dqlock_nowait(dqp))
- return XFS_ITEM_LOCKED;
-
- if (!xfs_dqflock_nowait(dqp)) {
- /*
- * dquot has already been flushed to the backing buffer,
- * leave it locked, pushbuf routine will unlock it.
- */
- return XFS_ITEM_PUSHBUF;
- }
-
- ASSERT(lip->li_flags & XFS_LI_IN_AIL);
- return XFS_ITEM_SUCCESS;
-}
-
-/*
- * Unlock the dquot associated with the log item.
- * Clear the fields of the dquot and dquot log item that
- * are specific to the current transaction. If the
- * hold flags is set, do not unlock the dquot.
- */
-STATIC void
-xfs_qm_dquot_logitem_unlock(
- struct xfs_log_item *lip)
-{
- struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- /*
- * Clear the transaction pointer in the dquot
- */
- dqp->q_transp = NULL;
-
- /*
- * dquots are never 'held' from getting unlocked at the end of
- * a transaction. Their locking and unlocking is hidden inside the
- * transaction layer, within trans_commit. Hence, no LI_HOLD flag
- * for the logitem.
- */
- xfs_dqunlock(dqp);
-}
-
-/*
- * this needs to stamp an lsn into the dquot, I think.
- * rpc's that look at user dquot's would then have to
- * push on the dependency recorded in the dquot
- */
-STATIC void
-xfs_qm_dquot_logitem_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
-}
-
-/*
- * This is the ops vector for dquots
- */
-static struct xfs_item_ops xfs_dquot_item_ops = {
- .iop_size = xfs_qm_dquot_logitem_size,
- .iop_format = xfs_qm_dquot_logitem_format,
- .iop_pin = xfs_qm_dquot_logitem_pin,
- .iop_unpin = xfs_qm_dquot_logitem_unpin,
- .iop_trylock = xfs_qm_dquot_logitem_trylock,
- .iop_unlock = xfs_qm_dquot_logitem_unlock,
- .iop_committed = xfs_qm_dquot_logitem_committed,
- .iop_push = xfs_qm_dquot_logitem_push,
- .iop_pushbuf = xfs_qm_dquot_logitem_pushbuf,
- .iop_committing = xfs_qm_dquot_logitem_committing
-};
-
-/*
- * Initialize the dquot log item for a newly allocated dquot.
- * The dquot isn't locked at this point, but it isn't on any of the lists
- * either, so we don't care.
- */
-void
-xfs_qm_dquot_logitem_init(
- struct xfs_dquot *dqp)
-{
- struct xfs_dq_logitem *lp = &dqp->q_logitem;
-
- xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,
- &xfs_dquot_item_ops);
- lp->qli_dquot = dqp;
- lp->qli_format.qlf_type = XFS_LI_DQUOT;
- lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id);
- lp->qli_format.qlf_blkno = dqp->q_blkno;
- lp->qli_format.qlf_len = 1;
- /*
- * This is just the offset of this dquot within its buffer
- * (which is currently 1 FSB and probably won't change).
- * Hence 32 bits for this offset should be just fine.
- * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t))
- * here, and recompute it at recovery time.
- */
- lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;
-}
-
-/*------------------ QUOTAOFF LOG ITEMS -------------------*/
-
-static inline struct xfs_qoff_logitem *QOFF_ITEM(struct xfs_log_item *lip)
-{
- return container_of(lip, struct xfs_qoff_logitem, qql_item);
-}
-
-
-/*
- * This returns the number of iovecs needed to log the given quotaoff item.
- * We only need 1 iovec for an quotaoff item. It just logs the
- * quotaoff_log_format structure.
- */
-STATIC uint
-xfs_qm_qoff_logitem_size(
- struct xfs_log_item *lip)
-{
- return 1;
-}
-
-/*
- * This is called to fill in the vector of log iovecs for the
- * given quotaoff log item. We use only 1 iovec, and we point that
- * at the quotaoff_log_format structure embedded in the quotaoff item.
- * It is at this point that we assert that all of the extent
- * slots in the quotaoff item have been filled.
- */
-STATIC void
-xfs_qm_qoff_logitem_format(
- struct xfs_log_item *lip,
- struct xfs_log_iovec *log_vector)
-{
- struct xfs_qoff_logitem *qflip = QOFF_ITEM(lip);
-
- ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF);
-
- log_vector->i_addr = &qflip->qql_format;
- log_vector->i_len = sizeof(xfs_qoff_logitem_t);
- log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF;
- qflip->qql_format.qf_size = 1;
-}
-
-/*
- * Pinning has no meaning for an quotaoff item, so just return.
- */
-STATIC void
-xfs_qm_qoff_logitem_pin(
- struct xfs_log_item *lip)
-{
-}
-
-/*
- * Since pinning has no meaning for an quotaoff item, unpinning does
- * not either.
- */
-STATIC void
-xfs_qm_qoff_logitem_unpin(
- struct xfs_log_item *lip,
- int remove)
-{
-}
-
-/*
- * Quotaoff items have no locking, so just return success.
- */
-STATIC uint
-xfs_qm_qoff_logitem_trylock(
- struct xfs_log_item *lip)
-{
- return XFS_ITEM_LOCKED;
-}
-
-/*
- * Quotaoff items have no locking or pushing, so return failure
- * so that the caller doesn't bother with us.
- */
-STATIC void
-xfs_qm_qoff_logitem_unlock(
- struct xfs_log_item *lip)
-{
-}
-
-/*
- * The quotaoff-start-item is logged only once and cannot be moved in the log,
- * so simply return the lsn at which it's been logged.
- */
-STATIC xfs_lsn_t
-xfs_qm_qoff_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- return lsn;
-}
-
-/*
- * There isn't much you can do to push on an quotaoff item. It is simply
- * stuck waiting for the log to be flushed to disk.
- */
-STATIC void
-xfs_qm_qoff_logitem_push(
- struct xfs_log_item *lip)
-{
-}
-
-
-STATIC xfs_lsn_t
-xfs_qm_qoffend_logitem_committed(
- struct xfs_log_item *lip,
- xfs_lsn_t lsn)
-{
- struct xfs_qoff_logitem *qfe = QOFF_ITEM(lip);
- struct xfs_qoff_logitem *qfs = qfe->qql_start_lip;
- struct xfs_ail *ailp = qfs->qql_item.li_ailp;
-
- /*
- * Delete the qoff-start logitem from the AIL.
- * xfs_trans_ail_delete() drops the AIL lock.
- */
- spin_lock(&ailp->xa_lock);
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)qfs);
-
- kmem_free(qfs);
- kmem_free(qfe);
- return (xfs_lsn_t)-1;
-}
-
-/*
- * XXX rcc - don't know quite what to do with this. I think we can
- * just ignore it. The only time that isn't the case is if we allow
- * the client to somehow see that quotas have been turned off in which
- * we can't allow that to get back until the quotaoff hits the disk.
- * So how would that happen? Also, do we need different routines for
- * quotaoff start and quotaoff end? I suspect the answer is yes but
- * to be sure, I need to look at the recovery code and see how quota off
- * recovery is handled (do we roll forward or back or do something else).
- * If we roll forwards or backwards, then we need two separate routines,
- * one that does nothing and one that stamps in the lsn that matters
- * (truly makes the quotaoff irrevocable). If we do something else,
- * then maybe we don't need two.
- */
-STATIC void
-xfs_qm_qoff_logitem_committing(
- struct xfs_log_item *lip,
- xfs_lsn_t commit_lsn)
-{
-}
-
-static struct xfs_item_ops xfs_qm_qoffend_logitem_ops = {
- .iop_size = xfs_qm_qoff_logitem_size,
- .iop_format = xfs_qm_qoff_logitem_format,
- .iop_pin = xfs_qm_qoff_logitem_pin,
- .iop_unpin = xfs_qm_qoff_logitem_unpin,
- .iop_trylock = xfs_qm_qoff_logitem_trylock,
- .iop_unlock = xfs_qm_qoff_logitem_unlock,
- .iop_committed = xfs_qm_qoffend_logitem_committed,
- .iop_push = xfs_qm_qoff_logitem_push,
- .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * This is the ops vector shared by all quotaoff-start log items.
- */
-static struct xfs_item_ops xfs_qm_qoff_logitem_ops = {
- .iop_size = xfs_qm_qoff_logitem_size,
- .iop_format = xfs_qm_qoff_logitem_format,
- .iop_pin = xfs_qm_qoff_logitem_pin,
- .iop_unpin = xfs_qm_qoff_logitem_unpin,
- .iop_trylock = xfs_qm_qoff_logitem_trylock,
- .iop_unlock = xfs_qm_qoff_logitem_unlock,
- .iop_committed = xfs_qm_qoff_logitem_committed,
- .iop_push = xfs_qm_qoff_logitem_push,
- .iop_committing = xfs_qm_qoff_logitem_committing
-};
-
-/*
- * Allocate and initialize an quotaoff item of the correct quota type(s).
- */
-struct xfs_qoff_logitem *
-xfs_qm_qoff_logitem_init(
- struct xfs_mount *mp,
- struct xfs_qoff_logitem *start,
- uint flags)
-{
- struct xfs_qoff_logitem *qf;
-
- qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
-
- xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
- &xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
- qf->qql_item.li_mountp = mp;
- qf->qql_format.qf_type = XFS_LI_QUOTAOFF;
- qf->qql_format.qf_flags = flags;
- qf->qql_start_lip = start;
- return qf;
-}
diff --git a/fs/xfs/quota/xfs_dquot_item.h b/fs/xfs/quota/xfs_dquot_item.h
deleted file mode 100644
index 5acae2a..0000000
--- a/fs/xfs/quota/xfs_dquot_item.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DQUOT_ITEM_H__
-#define __XFS_DQUOT_ITEM_H__
-
-struct xfs_dquot;
-struct xfs_trans;
-struct xfs_mount;
-struct xfs_qoff_logitem;
-
-typedef struct xfs_dq_logitem {
- xfs_log_item_t qli_item; /* common portion */
- struct xfs_dquot *qli_dquot; /* dquot ptr */
- xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
- xfs_dq_logformat_t qli_format; /* logged structure */
-} xfs_dq_logitem_t;
-
-typedef struct xfs_qoff_logitem {
- xfs_log_item_t qql_item; /* common portion */
- struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
- xfs_qoff_logformat_t qql_format; /* logged structure */
-} xfs_qoff_logitem_t;
-
-
-extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
- struct xfs_qoff_logitem *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
- struct xfs_qoff_logitem *, uint);
-extern void xfs_trans_log_quotaoff_item(struct xfs_trans *,
- struct xfs_qoff_logitem *);
-
-#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c
deleted file mode 100644
index e70c7fc..0000000
--- a/fs/xfs/quota/xfs_qm.c
+++ /dev/null
@@ -1,2462 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
-#include "xfs_inode.h"
-#include "xfs_ialloc.h"
-#include "xfs_itable.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_bmap.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_space.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-/*
- * The global quota manager. There is only one of these for the entire
- * system, _not_ one per file system. XQM keeps track of the overall
- * quota functionality, including maintaining the freelist and hash
- * tables of dquots.
- */
-struct mutex xfs_Gqm_lock;
-struct xfs_qm *xfs_Gqm;
-uint ndquot;
-
-kmem_zone_t *qm_dqzone;
-kmem_zone_t *qm_dqtrxzone;
-
-STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int);
-STATIC void xfs_qm_list_destroy(xfs_dqlist_t *);
-
-STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *);
-
-static struct shrinker xfs_qm_shaker = {
- .shrink = xfs_qm_shake,
- .seeks = DEFAULT_SEEKS,
-};
-
-#ifdef DEBUG
-extern struct mutex qcheck_lock;
-#endif
-
-#ifdef QUOTADEBUG
-static void
-xfs_qm_dquot_list_print(
- struct xfs_mount *mp)
-{
- xfs_dquot_t *dqp;
- int i = 0;
-
- list_for_each_entry(dqp, &mp->m_quotainfo->qi_dqlist_lock, qi_mplist) {
- xfs_debug(mp, " %d. \"%d (%s)\" "
- "bcnt = %lld, icnt = %lld, refs = %d",
- i++, be32_to_cpu(dqp->q_core.d_id),
- DQFLAGTO_TYPESTR(dqp),
- (long long)be64_to_cpu(dqp->q_core.d_bcount),
- (long long)be64_to_cpu(dqp->q_core.d_icount),
- dqp->q_nrefs);
- }
-}
-#else
-static void xfs_qm_dquot_list_print(struct xfs_mount *mp) { }
-#endif
-
-/*
- * Initialize the XQM structure.
- * Note that there is not one quota manager per file system.
- */
-STATIC struct xfs_qm *
-xfs_Gqm_init(void)
-{
- xfs_dqhash_t *udqhash, *gdqhash;
- xfs_qm_t *xqm;
- size_t hsize;
- uint i;
-
- /*
- * Initialize the dquot hash tables.
- */
- udqhash = kmem_zalloc_greedy(&hsize,
- XFS_QM_HASHSIZE_LOW * sizeof(xfs_dqhash_t),
- XFS_QM_HASHSIZE_HIGH * sizeof(xfs_dqhash_t));
- if (!udqhash)
- goto out;
-
- gdqhash = kmem_zalloc_large(hsize);
- if (!gdqhash)
- goto out_free_udqhash;
-
- hsize /= sizeof(xfs_dqhash_t);
- ndquot = hsize << 8;
-
- xqm = kmem_zalloc(sizeof(xfs_qm_t), KM_SLEEP);
- xqm->qm_dqhashmask = hsize - 1;
- xqm->qm_usr_dqhtable = udqhash;
- xqm->qm_grp_dqhtable = gdqhash;
- ASSERT(xqm->qm_usr_dqhtable != NULL);
- ASSERT(xqm->qm_grp_dqhtable != NULL);
-
- for (i = 0; i < hsize; i++) {
- xfs_qm_list_init(&(xqm->qm_usr_dqhtable[i]), "uxdqh", i);
- xfs_qm_list_init(&(xqm->qm_grp_dqhtable[i]), "gxdqh", i);
- }
-
- /*
- * Freelist of all dquots of all file systems
- */
- INIT_LIST_HEAD(&xqm->qm_dqfrlist);
- xqm->qm_dqfrlist_cnt = 0;
- mutex_init(&xqm->qm_dqfrlist_lock);
-
- /*
- * dquot zone. we register our own low-memory callback.
- */
- if (!qm_dqzone) {
- xqm->qm_dqzone = kmem_zone_init(sizeof(xfs_dquot_t),
- "xfs_dquots");
- qm_dqzone = xqm->qm_dqzone;
- } else
- xqm->qm_dqzone = qm_dqzone;
-
- register_shrinker(&xfs_qm_shaker);
-
- /*
- * The t_dqinfo portion of transactions.
- */
- if (!qm_dqtrxzone) {
- xqm->qm_dqtrxzone = kmem_zone_init(sizeof(xfs_dquot_acct_t),
- "xfs_dqtrx");
- qm_dqtrxzone = xqm->qm_dqtrxzone;
- } else
- xqm->qm_dqtrxzone = qm_dqtrxzone;
-
- atomic_set(&xqm->qm_totaldquots, 0);
- xqm->qm_dqfree_ratio = XFS_QM_DQFREE_RATIO;
- xqm->qm_nrefs = 0;
-#ifdef DEBUG
- mutex_init(&qcheck_lock);
-#endif
- return xqm;
-
- out_free_udqhash:
- kmem_free_large(udqhash);
- out:
- return NULL;
-}
-
-/*
- * Destroy the global quota manager when its reference count goes to zero.
- */
-STATIC void
-xfs_qm_destroy(
- struct xfs_qm *xqm)
-{
- struct xfs_dquot *dqp, *n;
- int hsize, i;
-
- ASSERT(xqm != NULL);
- ASSERT(xqm->qm_nrefs == 0);
- unregister_shrinker(&xfs_qm_shaker);
- hsize = xqm->qm_dqhashmask + 1;
- for (i = 0; i < hsize; i++) {
- xfs_qm_list_destroy(&(xqm->qm_usr_dqhtable[i]));
- xfs_qm_list_destroy(&(xqm->qm_grp_dqhtable[i]));
- }
- kmem_free_large(xqm->qm_usr_dqhtable);
- kmem_free_large(xqm->qm_grp_dqhtable);
- xqm->qm_usr_dqhtable = NULL;
- xqm->qm_grp_dqhtable = NULL;
- xqm->qm_dqhashmask = 0;
-
- /* frlist cleanup */
- mutex_lock(&xqm->qm_dqfrlist_lock);
- list_for_each_entry_safe(dqp, n, &xqm->qm_dqfrlist, q_freelist) {
- xfs_dqlock(dqp);
-#ifdef QUOTADEBUG
- xfs_debug(dqp->q_mount, "FREELIST destroy 0x%p", dqp);
-#endif
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- xfs_dqunlock(dqp);
- xfs_qm_dqdestroy(dqp);
- }
- mutex_unlock(&xqm->qm_dqfrlist_lock);
- mutex_destroy(&xqm->qm_dqfrlist_lock);
-#ifdef DEBUG
- mutex_destroy(&qcheck_lock);
-#endif
- kmem_free(xqm);
-}
-
-/*
- * Called at mount time to let XQM know that another file system is
- * starting quotas. This isn't crucial information as the individual mount
- * structures are pretty independent, but it helps the XQM keep a
- * global view of what's going on.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_hold_quotafs_ref(
- struct xfs_mount *mp)
-{
- /*
- * Need to lock the xfs_Gqm structure for things like this. For example,
- * the structure could disappear between the entry to this routine and
- * a HOLD operation if not locked.
- */
- mutex_lock(&xfs_Gqm_lock);
-
- if (!xfs_Gqm) {
- xfs_Gqm = xfs_Gqm_init();
- if (!xfs_Gqm) {
- mutex_unlock(&xfs_Gqm_lock);
- return ENOMEM;
- }
- }
-
- /*
- * We can keep a list of all filesystems with quotas mounted for
- * debugging and statistical purposes, but ...
- * Just take a reference and get out.
- */
- xfs_Gqm->qm_nrefs++;
- mutex_unlock(&xfs_Gqm_lock);
-
- return 0;
-}
-
-
-/*
- * Release the reference that a filesystem took at mount time,
- * so that we know when we need to destroy the entire quota manager.
- */
-/* ARGSUSED */
-STATIC void
-xfs_qm_rele_quotafs_ref(
- struct xfs_mount *mp)
-{
- xfs_dquot_t *dqp, *n;
-
- ASSERT(xfs_Gqm);
- ASSERT(xfs_Gqm->qm_nrefs > 0);
-
- /*
- * Go thru the freelist and destroy all inactive dquots.
- */
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
- list_for_each_entry_safe(dqp, n, &xfs_Gqm->qm_dqfrlist, q_freelist) {
- xfs_dqlock(dqp);
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(dqp->q_mount == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(list_empty(&dqp->q_hashlist));
- ASSERT(list_empty(&dqp->q_mplist));
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- xfs_dqunlock(dqp);
- xfs_qm_dqdestroy(dqp);
- } else {
- xfs_dqunlock(dqp);
- }
- }
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
-
- /*
- * Destroy the entire XQM. If somebody mounts with quotaon, this'll
- * be restarted.
- */
- mutex_lock(&xfs_Gqm_lock);
- if (--xfs_Gqm->qm_nrefs == 0) {
- xfs_qm_destroy(xfs_Gqm);
- xfs_Gqm = NULL;
- }
- mutex_unlock(&xfs_Gqm_lock);
-}
-
-/*
- * Just destroy the quotainfo structure.
- */
-void
-xfs_qm_unmount(
- struct xfs_mount *mp)
-{
- if (mp->m_quotainfo) {
- xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
- xfs_qm_destroy_quotainfo(mp);
- }
-}
-
-
-/*
- * This is called from xfs_mountfs to start quotas and initialize all
- * necessary data structures like quotainfo. This is also responsible for
- * running a quotacheck as necessary. We are guaranteed that the superblock
- * is consistently read in at this point.
- *
- * If we fail here, the mount will continue with quota turned off. We don't
- * need to inidicate success or failure at all.
- */
-void
-xfs_qm_mount_quotas(
- xfs_mount_t *mp)
-{
- int error = 0;
- uint sbf;
-
- /*
- * If quotas on realtime volumes is not supported, we disable
- * quotas immediately.
- */
- if (mp->m_sb.sb_rextents) {
- xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
- mp->m_qflags = 0;
- goto write_changes;
- }
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * Allocate the quotainfo structure inside the mount struct, and
- * create quotainode(s), and change/rev superblock if necessary.
- */
- error = xfs_qm_init_quotainfo(mp);
- if (error) {
- /*
- * We must turn off quotas.
- */
- ASSERT(mp->m_quotainfo == NULL);
- mp->m_qflags = 0;
- goto write_changes;
- }
- /*
- * If any of the quotas are not consistent, do a quotacheck.
- */
- if (XFS_QM_NEED_QUOTACHECK(mp)) {
- error = xfs_qm_quotacheck(mp);
- if (error) {
- /* Quotacheck failed and disabled quotas. */
- return;
- }
- }
- /*
- * If one type of quotas is off, then it will lose its
- * quotachecked status, since we won't be doing accounting for
- * that type anymore.
- */
- if (!XFS_IS_UQUOTA_ON(mp))
- mp->m_qflags &= ~XFS_UQUOTA_CHKD;
- if (!(XFS_IS_GQUOTA_ON(mp) || XFS_IS_PQUOTA_ON(mp)))
- mp->m_qflags &= ~XFS_OQUOTA_CHKD;
-
- write_changes:
- /*
- * We actually don't have to acquire the m_sb_lock at all.
- * This can only be called from mount, and that's single threaded. XXX
- */
- spin_lock(&mp->m_sb_lock);
- sbf = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
- spin_unlock(&mp->m_sb_lock);
-
- if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
- if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
- /*
- * We could only have been turning quotas off.
- * We aren't in very good shape actually because
- * the incore structures are convinced that quotas are
- * off, but the on disk superblock doesn't know that !
- */
- ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
- xfs_alert(mp, "%s: Superblock update failed!",
- __func__);
- }
- }
-
- if (error) {
- xfs_warn(mp, "Failed to initialize disk quotas.");
- return;
- }
-
-#ifdef QUOTADEBUG
- if (XFS_IS_QUOTA_ON(mp))
- xfs_qm_internalqcheck(mp);
-#endif
-}
-
-/*
- * Called from the vfsops layer.
- */
-void
-xfs_qm_unmount_quotas(
- xfs_mount_t *mp)
-{
- /*
- * Release the dquots that root inode, et al might be holding,
- * before we flush quotas and blow away the quotainfo structure.
- */
- ASSERT(mp->m_rootip);
- xfs_qm_dqdetach(mp->m_rootip);
- if (mp->m_rbmip)
- xfs_qm_dqdetach(mp->m_rbmip);
- if (mp->m_rsumip)
- xfs_qm_dqdetach(mp->m_rsumip);
-
- /*
- * Release the quota inodes.
- */
- if (mp->m_quotainfo) {
- if (mp->m_quotainfo->qi_uquotaip) {
- IRELE(mp->m_quotainfo->qi_uquotaip);
- mp->m_quotainfo->qi_uquotaip = NULL;
- }
- if (mp->m_quotainfo->qi_gquotaip) {
- IRELE(mp->m_quotainfo->qi_gquotaip);
- mp->m_quotainfo->qi_gquotaip = NULL;
- }
- }
-}
-
-/*
- * Flush all dquots of the given file system to disk. The dquots are
- * _not_ purged from memory here, just their data written to disk.
- */
-STATIC int
-xfs_qm_dqflush_all(
- struct xfs_mount *mp,
- int sync_mode)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- int recl;
- struct xfs_dquot *dqp;
- int error;
-
- if (!q)
- return 0;
-again:
- mutex_lock(&q->qi_dqlist_lock);
- list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
- xfs_dqlock(dqp);
- if (! XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
-
- /* XXX a sentinel would be better */
- recl = q->qi_dqreclaims;
- if (!xfs_dqflock_nowait(dqp)) {
- /*
- * If we can't grab the flush lock then check
- * to see if the dquot has been flushed delayed
- * write. If so, grab its buffer and send it
- * out immediately. We'll be able to acquire
- * the flush lock when the I/O completes.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
- /*
- * Let go of the mplist lock. We don't want to hold it
- * across a disk write.
- */
- mutex_unlock(&q->qi_dqlist_lock);
- error = xfs_qm_dqflush(dqp, sync_mode);
- xfs_dqunlock(dqp);
- if (error)
- return error;
-
- mutex_lock(&q->qi_dqlist_lock);
- if (recl != q->qi_dqreclaims) {
- mutex_unlock(&q->qi_dqlist_lock);
- /* XXX restart limit */
- goto again;
- }
- }
-
- mutex_unlock(&q->qi_dqlist_lock);
- /* return ! busy */
- return 0;
-}
-/*
- * Release the group dquot pointers the user dquots may be
- * carrying around as a hint. mplist is locked on entry and exit.
- */
-STATIC void
-xfs_qm_detach_gdquots(
- struct xfs_mount *mp)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- struct xfs_dquot *dqp, *gdqp;
- int nrecl;
-
- again:
- ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
- list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
- xfs_dqlock(dqp);
- if ((gdqp = dqp->q_gdquot)) {
- xfs_dqlock(gdqp);
- dqp->q_gdquot = NULL;
- }
- xfs_dqunlock(dqp);
-
- if (gdqp) {
- /*
- * Can't hold the mplist lock across a dqput.
- * XXXmust convert to marker based iterations here.
- */
- nrecl = q->qi_dqreclaims;
- mutex_unlock(&q->qi_dqlist_lock);
- xfs_qm_dqput(gdqp);
-
- mutex_lock(&q->qi_dqlist_lock);
- if (nrecl != q->qi_dqreclaims)
- goto again;
- }
- }
-}
-
-/*
- * Go through all the incore dquots of this file system and take them
- * off the mplist and hashlist, if the dquot type matches the dqtype
- * parameter. This is used when turning off quota accounting for
- * users and/or groups, as well as when the filesystem is unmounting.
- */
-STATIC int
-xfs_qm_dqpurge_int(
- struct xfs_mount *mp,
- uint flags)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- struct xfs_dquot *dqp, *n;
- uint dqtype;
- int nrecl;
- int nmisses;
-
- if (!q)
- return 0;
-
- dqtype = (flags & XFS_QMOPT_UQUOTA) ? XFS_DQ_USER : 0;
- dqtype |= (flags & XFS_QMOPT_PQUOTA) ? XFS_DQ_PROJ : 0;
- dqtype |= (flags & XFS_QMOPT_GQUOTA) ? XFS_DQ_GROUP : 0;
-
- mutex_lock(&q->qi_dqlist_lock);
-
- /*
- * In the first pass through all incore dquots of this filesystem,
- * we release the group dquot pointers the user dquots may be
- * carrying around as a hint. We need to do this irrespective of
- * what's being turned off.
- */
- xfs_qm_detach_gdquots(mp);
-
- again:
- nmisses = 0;
- ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
- /*
- * Try to get rid of all of the unwanted dquots. The idea is to
- * get them off mplist and hashlist, but leave them on freelist.
- */
- list_for_each_entry_safe(dqp, n, &q->qi_dqlist, q_mplist) {
- /*
- * It's OK to look at the type without taking dqlock here.
- * We're holding the mplist lock here, and that's needed for
- * a dqreclaim.
- */
- if ((dqp->dq_flags & dqtype) == 0)
- continue;
-
- if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
- nrecl = q->qi_dqreclaims;
- mutex_unlock(&q->qi_dqlist_lock);
- mutex_lock(&dqp->q_hash->qh_lock);
- mutex_lock(&q->qi_dqlist_lock);
-
- /*
- * XXXTheoretically, we can get into a very long
- * ping pong game here.
- * No one can be adding dquots to the mplist at
- * this point, but somebody might be taking things off.
- */
- if (nrecl != q->qi_dqreclaims) {
- mutex_unlock(&dqp->q_hash->qh_lock);
- goto again;
- }
- }
-
- /*
- * Take the dquot off the mplist and hashlist. It may remain on
- * freelist in INACTIVE state.
- */
- nmisses += xfs_qm_dqpurge(dqp);
- }
- mutex_unlock(&q->qi_dqlist_lock);
- return nmisses;
-}
-
-int
-xfs_qm_dqpurge_all(
- xfs_mount_t *mp,
- uint flags)
-{
- int ndquots;
-
- /*
- * Purge the dquot cache.
- * None of the dquots should really be busy at this point.
- */
- if (mp->m_quotainfo) {
- while ((ndquots = xfs_qm_dqpurge_int(mp, flags))) {
- delay(ndquots * 10);
- }
- }
- return 0;
-}
-
-STATIC int
-xfs_qm_dqattach_one(
- xfs_inode_t *ip,
- xfs_dqid_t id,
- uint type,
- uint doalloc,
- xfs_dquot_t *udqhint, /* hint */
- xfs_dquot_t **IO_idqpp)
-{
- xfs_dquot_t *dqp;
- int error;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- error = 0;
-
- /*
- * See if we already have it in the inode itself. IO_idqpp is
- * &i_udquot or &i_gdquot. This made the code look weird, but
- * made the logic a lot simpler.
- */
- dqp = *IO_idqpp;
- if (dqp) {
- trace_xfs_dqattach_found(dqp);
- return 0;
- }
-
- /*
- * udqhint is the i_udquot field in inode, and is non-NULL only
- * when the type arg is group/project. Its purpose is to save a
- * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
- * the user dquot.
- */
- if (udqhint) {
- ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
- xfs_dqlock(udqhint);
-
- /*
- * No need to take dqlock to look at the id.
- *
- * The ID can't change until it gets reclaimed, and it won't
- * be reclaimed as long as we have a ref from inode and we
- * hold the ilock.
- */
- dqp = udqhint->q_gdquot;
- if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
- xfs_dqlock(dqp);
- XFS_DQHOLD(dqp);
- ASSERT(*IO_idqpp == NULL);
- *IO_idqpp = dqp;
-
- xfs_dqunlock(dqp);
- xfs_dqunlock(udqhint);
- return 0;
- }
-
- /*
- * We can't hold a dquot lock when we call the dqget code.
- * We'll deadlock in no time, because of (not conforming to)
- * lock ordering - the inodelock comes before any dquot lock,
- * and we may drop and reacquire the ilock in xfs_qm_dqget().
- */
- xfs_dqunlock(udqhint);
- }
-
- /*
- * Find the dquot from somewhere. This bumps the
- * reference count of dquot and returns it locked.
- * This can return ENOENT if dquot didn't exist on
- * disk and we didn't ask it to allocate;
- * ESRCH if quotas got turned off suddenly.
- */
- error = xfs_qm_dqget(ip->i_mount, ip, id, type,
- doalloc | XFS_QMOPT_DOWARN, &dqp);
- if (error)
- return error;
-
- trace_xfs_dqattach_get(dqp);
-
- /*
- * dqget may have dropped and re-acquired the ilock, but it guarantees
- * that the dquot returned is the one that should go in the inode.
- */
- *IO_idqpp = dqp;
- xfs_dqunlock(dqp);
- return 0;
-}
-
-
-/*
- * Given a udquot and gdquot, attach a ptr to the group dquot in the
- * udquot as a hint for future lookups. The idea sounds simple, but the
- * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering constraints.
- * The process is complicated more by the fact that the dquots may or may not
- * be locked on entry.
- */
-STATIC void
-xfs_qm_dqattach_grouphint(
- xfs_dquot_t *udq,
- xfs_dquot_t *gdq)
-{
- xfs_dquot_t *tmp;
-
- xfs_dqlock(udq);
-
- if ((tmp = udq->q_gdquot)) {
- if (tmp == gdq) {
- xfs_dqunlock(udq);
- return;
- }
-
- udq->q_gdquot = NULL;
- /*
- * We can't keep any dqlocks when calling dqrele,
- * because the freelist lock comes before dqlocks.
- */
- xfs_dqunlock(udq);
- /*
- * we took a hard reference once upon a time in dqget,
- * so give it back when the udquot no longer points at it
- * dqput() does the unlocking of the dquot.
- */
- xfs_qm_dqrele(tmp);
-
- xfs_dqlock(udq);
- xfs_dqlock(gdq);
-
- } else {
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- xfs_dqlock(gdq);
- }
-
- ASSERT(XFS_DQ_IS_LOCKED(udq));
- ASSERT(XFS_DQ_IS_LOCKED(gdq));
- /*
- * Somebody could have attached a gdquot here,
- * when we dropped the uqlock. If so, just do nothing.
- */
- if (udq->q_gdquot == NULL) {
- XFS_DQHOLD(gdq);
- udq->q_gdquot = gdq;
- }
-
- xfs_dqunlock(gdq);
- xfs_dqunlock(udq);
-}
-
-
-/*
- * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
- * into account.
- * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
- * Inode may get unlocked and relocked in here, and the caller must deal with
- * the consequences.
- */
-int
-xfs_qm_dqattach_locked(
- xfs_inode_t *ip,
- uint flags)
-{
- xfs_mount_t *mp = ip->i_mount;
- uint nquotas = 0;
- int error = 0;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) ||
- !XFS_IS_QUOTA_ON(mp) ||
- !XFS_NOT_DQATTACHED(mp, ip) ||
- ip->i_ino == mp->m_sb.sb_uquotino ||
- ip->i_ino == mp->m_sb.sb_gquotino)
- return 0;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-
- if (XFS_IS_UQUOTA_ON(mp)) {
- error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
- flags & XFS_QMOPT_DQALLOC,
- NULL, &ip->i_udquot);
- if (error)
- goto done;
- nquotas++;
- }
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (XFS_IS_OQUOTA_ON(mp)) {
- error = XFS_IS_GQUOTA_ON(mp) ?
- xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
- flags & XFS_QMOPT_DQALLOC,
- ip->i_udquot, &ip->i_gdquot) :
- xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
- flags & XFS_QMOPT_DQALLOC,
- ip->i_udquot, &ip->i_gdquot);
- /*
- * Don't worry about the udquot that we may have
- * attached above. It'll get detached, if not already.
- */
- if (error)
- goto done;
- nquotas++;
- }
-
- /*
- * Attach this group quota to the user quota as a hint.
- * This WON'T, in general, result in a thrash.
- */
- if (nquotas == 2) {
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_udquot);
- ASSERT(ip->i_gdquot);
-
- /*
- * We may or may not have the i_udquot locked at this point,
- * but this check is OK since we don't depend on the i_gdquot to
- * be accurate 100% all the time. It is just a hint, and this
- * will succeed in general.
- */
- if (ip->i_udquot->q_gdquot == ip->i_gdquot)
- goto done;
- /*
- * Attach i_gdquot to the gdquot hint inside the i_udquot.
- */
- xfs_qm_dqattach_grouphint(ip->i_udquot, ip->i_gdquot);
- }
-
- done:
-#ifdef QUOTADEBUG
- if (! error) {
- if (XFS_IS_UQUOTA_ON(mp))
- ASSERT(ip->i_udquot);
- if (XFS_IS_OQUOTA_ON(mp))
- ASSERT(ip->i_gdquot);
- }
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
-#endif
- return error;
-}
-
-int
-xfs_qm_dqattach(
- struct xfs_inode *ip,
- uint flags)
-{
- int error;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- error = xfs_qm_dqattach_locked(ip, flags);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- return error;
-}
-
-/*
- * Release dquots (and their references) if any.
- * The inode should be locked EXCL except when this's called by
- * xfs_ireclaim.
- */
-void
-xfs_qm_dqdetach(
- xfs_inode_t *ip)
-{
- if (!(ip->i_udquot || ip->i_gdquot))
- return;
-
- trace_xfs_dquot_dqdetach(ip);
-
- ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_uquotino);
- ASSERT(ip->i_ino != ip->i_mount->m_sb.sb_gquotino);
- if (ip->i_udquot) {
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (ip->i_gdquot) {
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
-}
-
-int
-xfs_qm_sync(
- struct xfs_mount *mp,
- int flags)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- int recl, restarts;
- struct xfs_dquot *dqp;
- int error;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return 0;
-
- restarts = 0;
-
- again:
- mutex_lock(&q->qi_dqlist_lock);
- /*
- * dqpurge_all() also takes the mplist lock and iterate thru all dquots
- * in quotaoff. However, if the QUOTA_ACTIVE bits are not cleared
- * when we have the mplist lock, we know that dquots will be consistent
- * as long as we have it locked.
- */
- if (!XFS_IS_QUOTA_ON(mp)) {
- mutex_unlock(&q->qi_dqlist_lock);
- return 0;
- }
- ASSERT(mutex_is_locked(&q->qi_dqlist_lock));
- list_for_each_entry(dqp, &q->qi_dqlist, q_mplist) {
- /*
- * If this is vfs_sync calling, then skip the dquots that
- * don't 'seem' to be dirty. ie. don't acquire dqlock.
- * This is very similar to what xfs_sync does with inodes.
- */
- if (flags & SYNC_TRYLOCK) {
- if (!XFS_DQ_IS_DIRTY(dqp))
- continue;
- if (!xfs_qm_dqlock_nowait(dqp))
- continue;
- } else {
- xfs_dqlock(dqp);
- }
-
- /*
- * Now, find out for sure if this dquot is dirty or not.
- */
- if (! XFS_DQ_IS_DIRTY(dqp)) {
- xfs_dqunlock(dqp);
- continue;
- }
-
- /* XXX a sentinel would be better */
- recl = q->qi_dqreclaims;
- if (!xfs_dqflock_nowait(dqp)) {
- if (flags & SYNC_TRYLOCK) {
- xfs_dqunlock(dqp);
- continue;
- }
- /*
- * If we can't grab the flush lock then if the caller
- * really wanted us to give this our best shot, so
- * see if we can give a push to the buffer before we wait
- * on the flush lock. At this point, we know that
- * even though the dquot is being flushed,
- * it has (new) dirty data.
- */
- xfs_qm_dqflock_pushbuf_wait(dqp);
- }
- /*
- * Let go of the mplist lock. We don't want to hold it
- * across a disk write
- */
- mutex_unlock(&q->qi_dqlist_lock);
- error = xfs_qm_dqflush(dqp, flags);
- xfs_dqunlock(dqp);
- if (error && XFS_FORCED_SHUTDOWN(mp))
- return 0; /* Need to prevent umount failure */
- else if (error)
- return error;
-
- mutex_lock(&q->qi_dqlist_lock);
- if (recl != q->qi_dqreclaims) {
- if (++restarts >= XFS_QM_SYNC_MAX_RESTARTS)
- break;
-
- mutex_unlock(&q->qi_dqlist_lock);
- goto again;
- }
- }
-
- mutex_unlock(&q->qi_dqlist_lock);
- return 0;
-}
-
-/*
- * The hash chains and the mplist use the same xfs_dqhash structure as
- * their list head, but we can take the mplist qh_lock and one of the
- * hash qh_locks at the same time without any problem as they aren't
- * related.
- */
-static struct lock_class_key xfs_quota_mplist_class;
-
-/*
- * This initializes all the quota information that's kept in the
- * mount structure
- */
-STATIC int
-xfs_qm_init_quotainfo(
- xfs_mount_t *mp)
-{
- xfs_quotainfo_t *qinf;
- int error;
- xfs_dquot_t *dqp;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * Tell XQM that we exist as soon as possible.
- */
- if ((error = xfs_qm_hold_quotafs_ref(mp))) {
- return error;
- }
-
- qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
-
- /*
- * See if quotainodes are setup, and if not, allocate them,
- * and change the superblock accordingly.
- */
- if ((error = xfs_qm_init_quotainos(mp))) {
- kmem_free(qinf);
- mp->m_quotainfo = NULL;
- return error;
- }
-
- INIT_LIST_HEAD(&qinf->qi_dqlist);
- mutex_init(&qinf->qi_dqlist_lock);
- lockdep_set_class(&qinf->qi_dqlist_lock, &xfs_quota_mplist_class);
-
- qinf->qi_dqreclaims = 0;
-
- /* mutex used to serialize quotaoffs */
- mutex_init(&qinf->qi_quotaofflock);
-
- /* Precalc some constants */
- qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- ASSERT(qinf->qi_dqchunklen);
- qinf->qi_dqperchunk = BBTOB(qinf->qi_dqchunklen);
- do_div(qinf->qi_dqperchunk, sizeof(xfs_dqblk_t));
-
- mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
-
- /*
- * We try to get the limits from the superuser's limits fields.
- * This is quite hacky, but it is standard quota practice.
- * We look at the USR dquot with id == 0 first, but if user quotas
- * are not enabled we goto the GRP dquot with id == 0.
- * We don't really care to keep separate default limits for user
- * and group quotas, at least not at this point.
- */
- error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)0,
- XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
- (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
- XFS_DQ_PROJ),
- XFS_QMOPT_DQSUSER|XFS_QMOPT_DOWARN,
- &dqp);
- if (! error) {
- xfs_disk_dquot_t *ddqp = &dqp->q_core;
-
- /*
- * The warnings and timers set the grace period given to
- * a user or group before he or she can not perform any
- * more writing. If it is zero, a default is used.
- */
- qinf->qi_btimelimit = ddqp->d_btimer ?
- be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
- qinf->qi_itimelimit = ddqp->d_itimer ?
- be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
- qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
- be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
- qinf->qi_bwarnlimit = ddqp->d_bwarns ?
- be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
- qinf->qi_iwarnlimit = ddqp->d_iwarns ?
- be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
- qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
- be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
- qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
- qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
- qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
- qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
- qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
- qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-
- /*
- * We sent the XFS_QMOPT_DQSUSER flag to dqget because
- * we don't want this dquot cached. We haven't done a
- * quotacheck yet, and quotacheck doesn't like incore dquots.
- */
- xfs_qm_dqdestroy(dqp);
- } else {
- qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
- qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
- qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
- qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
- qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
- qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
- }
-
- return 0;
-}
-
-
-/*
- * Gets called when unmounting a filesystem or when all quotas get
- * turned off.
- * This purges the quota inodes, destroys locks and frees itself.
- */
-void
-xfs_qm_destroy_quotainfo(
- xfs_mount_t *mp)
-{
- xfs_quotainfo_t *qi;
-
- qi = mp->m_quotainfo;
- ASSERT(qi != NULL);
- ASSERT(xfs_Gqm != NULL);
-
- /*
- * Release the reference that XQM kept, so that we know
- * when the XQM structure should be freed. We cannot assume
- * that xfs_Gqm is non-null after this point.
- */
- xfs_qm_rele_quotafs_ref(mp);
-
- ASSERT(list_empty(&qi->qi_dqlist));
- mutex_destroy(&qi->qi_dqlist_lock);
-
- if (qi->qi_uquotaip) {
- IRELE(qi->qi_uquotaip);
- qi->qi_uquotaip = NULL; /* paranoia */
- }
- if (qi->qi_gquotaip) {
- IRELE(qi->qi_gquotaip);
- qi->qi_gquotaip = NULL;
- }
- mutex_destroy(&qi->qi_quotaofflock);
- kmem_free(qi);
- mp->m_quotainfo = NULL;
-}
-
-
-
-/* ------------------- PRIVATE STATIC FUNCTIONS ----------------------- */
-
-/* ARGSUSED */
-STATIC void
-xfs_qm_list_init(
- xfs_dqlist_t *list,
- char *str,
- int n)
-{
- mutex_init(&list->qh_lock);
- INIT_LIST_HEAD(&list->qh_list);
- list->qh_version = 0;
- list->qh_nelems = 0;
-}
-
-STATIC void
-xfs_qm_list_destroy(
- xfs_dqlist_t *list)
-{
- mutex_destroy(&(list->qh_lock));
-}
-
-/*
- * Create an inode and return with a reference already taken, but unlocked
- * This is how we create quota inodes
- */
-STATIC int
-xfs_qm_qino_alloc(
- xfs_mount_t *mp,
- xfs_inode_t **ip,
- __int64_t sbfields,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- int committed;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
- if ((error = xfs_trans_reserve(tp,
- XFS_QM_QINOCREATE_SPACE_RES(mp),
- XFS_CREATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_CREATE_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed);
- if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
- return error;
- }
-
- /*
- * Make the changes in the superblock, and log those too.
- * sbfields arg may contain fields other than *QUOTINO;
- * VERSIONNUM for example.
- */
- spin_lock(&mp->m_sb_lock);
- if (flags & XFS_QMOPT_SBVERSION) {
- ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
- ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
- XFS_SB_GQUOTINO | XFS_SB_QFLAGS)) ==
- (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
- XFS_SB_GQUOTINO | XFS_SB_QFLAGS));
-
- xfs_sb_version_addquota(&mp->m_sb);
- mp->m_sb.sb_uquotino = NULLFSINO;
- mp->m_sb.sb_gquotino = NULLFSINO;
-
- /* qflags will get updated _after_ quotacheck */
- mp->m_sb.sb_qflags = 0;
- }
- if (flags & XFS_QMOPT_UQUOTA)
- mp->m_sb.sb_uquotino = (*ip)->i_ino;
- else
- mp->m_sb.sb_gquotino = (*ip)->i_ino;
- spin_unlock(&mp->m_sb_lock);
- xfs_mod_sb(tp, sbfields);
-
- if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
- xfs_alert(mp, "%s failed (error %d)!", __func__, error);
- return error;
- }
- return 0;
-}
-
-
-STATIC void
-xfs_qm_reset_dqcounts(
- xfs_mount_t *mp,
- xfs_buf_t *bp,
- xfs_dqid_t id,
- uint type)
-{
- xfs_disk_dquot_t *ddq;
- int j;
-
- trace_xfs_reset_dqcounts(bp, _RET_IP_);
-
- /*
- * Reset all counters and timers. They'll be
- * started afresh by xfs_qm_quotacheck.
- */
-#ifdef DEBUG
- j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
- do_div(j, sizeof(xfs_dqblk_t));
- ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
-#endif
- ddq = (xfs_disk_dquot_t *)XFS_BUF_PTR(bp);
- for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
- /*
- * Do a sanity check, and if needed, repair the dqblk. Don't
- * output any warnings because it's perfectly possible to
- * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
- */
- (void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
- "xfs_quotacheck");
- ddq->d_bcount = 0;
- ddq->d_icount = 0;
- ddq->d_rtbcount = 0;
- ddq->d_btimer = 0;
- ddq->d_itimer = 0;
- ddq->d_rtbtimer = 0;
- ddq->d_bwarns = 0;
- ddq->d_iwarns = 0;
- ddq->d_rtbwarns = 0;
- ddq = (xfs_disk_dquot_t *) ((xfs_dqblk_t *)ddq + 1);
- }
-}
-
-STATIC int
-xfs_qm_dqiter_bufs(
- xfs_mount_t *mp,
- xfs_dqid_t firstid,
- xfs_fsblock_t bno,
- xfs_filblks_t blkcnt,
- uint flags)
-{
- xfs_buf_t *bp;
- int error;
- int type;
-
- ASSERT(blkcnt > 0);
- type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
- (flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
- error = 0;
-
- /*
- * Blkcnt arg can be a very big number, and might even be
- * larger than the log itself. So, we have to break it up into
- * manageable-sized transactions.
- * Note that we don't start a permanent transaction here; we might
- * not be able to get a log reservation for the whole thing up front,
- * and we don't really care to either, because we just discard
- * everything if we were to crash in the middle of this loop.
- */
- while (blkcnt--) {
- error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, bno),
- mp->m_quotainfo->qi_dqchunklen, 0, &bp);
- if (error)
- break;
-
- xfs_qm_reset_dqcounts(mp, bp, firstid, type);
- xfs_bdwrite(mp, bp);
- /*
- * goto the next block.
- */
- bno++;
- firstid += mp->m_quotainfo->qi_dqperchunk;
- }
- return error;
-}
-
-/*
- * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
- * caller supplied function for every chunk of dquots that we find.
- */
-STATIC int
-xfs_qm_dqiterate(
- xfs_mount_t *mp,
- xfs_inode_t *qip,
- uint flags)
-{
- xfs_bmbt_irec_t *map;
- int i, nmaps; /* number of map entries */
- int error; /* return value */
- xfs_fileoff_t lblkno;
- xfs_filblks_t maxlblkcnt;
- xfs_dqid_t firstid;
- xfs_fsblock_t rablkno;
- xfs_filblks_t rablkcnt;
-
- error = 0;
- /*
- * This looks racy, but we can't keep an inode lock across a
- * trans_reserve. But, this gets called during quotacheck, and that
- * happens only at mount time which is single threaded.
- */
- if (qip->i_d.di_nblocks == 0)
- return 0;
-
- map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
-
- lblkno = 0;
- maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- do {
- nmaps = XFS_DQITER_MAP_SIZE;
- /*
- * We aren't changing the inode itself. Just changing
- * some of its data. No new blocks are added here, and
- * the inode is never added to the transaction.
- */
- xfs_ilock(qip, XFS_ILOCK_SHARED);
- error = xfs_bmapi(NULL, qip, lblkno,
- maxlblkcnt - lblkno,
- XFS_BMAPI_METADATA,
- NULL,
- 0, map, &nmaps, NULL);
- xfs_iunlock(qip, XFS_ILOCK_SHARED);
- if (error)
- break;
-
- ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
- for (i = 0; i < nmaps; i++) {
- ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
- ASSERT(map[i].br_blockcount);
-
-
- lblkno += map[i].br_blockcount;
-
- if (map[i].br_startblock == HOLESTARTBLOCK)
- continue;
-
- firstid = (xfs_dqid_t) map[i].br_startoff *
- mp->m_quotainfo->qi_dqperchunk;
- /*
- * Do a read-ahead on the next extent.
- */
- if ((i+1 < nmaps) &&
- (map[i+1].br_startblock != HOLESTARTBLOCK)) {
- rablkcnt = map[i+1].br_blockcount;
- rablkno = map[i+1].br_startblock;
- while (rablkcnt--) {
- xfs_buf_readahead(mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, rablkno),
- mp->m_quotainfo->qi_dqchunklen);
- rablkno++;
- }
- }
- /*
- * Iterate thru all the blks in the extent and
- * reset the counters of all the dquots inside them.
- */
- if ((error = xfs_qm_dqiter_bufs(mp,
- firstid,
- map[i].br_startblock,
- map[i].br_blockcount,
- flags))) {
- break;
- }
- }
-
- if (error)
- break;
- } while (nmaps > 0);
-
- kmem_free(map);
-
- return error;
-}
-
-/*
- * Called by dqusage_adjust in doing a quotacheck.
- *
- * Given the inode, and a dquot id this updates both the incore dqout as well
- * as the buffer copy. This is so that once the quotacheck is done, we can
- * just log all the buffers, as opposed to logging numerous updates to
- * individual dquots.
- */
-STATIC int
-xfs_qm_quotacheck_dqadjust(
- struct xfs_inode *ip,
- xfs_dqid_t id,
- uint type,
- xfs_qcnt_t nblks,
- xfs_qcnt_t rtblks)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_dquot *dqp;
- int error;
-
- error = xfs_qm_dqget(mp, ip, id, type,
- XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
- if (error) {
- /*
- * Shouldn't be able to turn off quotas here.
- */
- ASSERT(error != ESRCH);
- ASSERT(error != ENOENT);
- return error;
- }
-
- trace_xfs_dqadjust(dqp);
-
- /*
- * Adjust the inode count and the block count to reflect this inode's
- * resource usage.
- */
- be64_add_cpu(&dqp->q_core.d_icount, 1);
- dqp->q_res_icount++;
- if (nblks) {
- be64_add_cpu(&dqp->q_core.d_bcount, nblks);
- dqp->q_res_bcount += nblks;
- }
- if (rtblks) {
- be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
- dqp->q_res_rtbcount += rtblks;
- }
-
- /*
- * Set default limits, adjust timers (since we changed usages)
- *
- * There are no timers for the default values set in the root dquot.
- */
- if (dqp->q_core.d_id) {
- xfs_qm_adjust_dqlimits(mp, &dqp->q_core);
- xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
- }
-
- dqp->dq_flags |= XFS_DQ_DIRTY;
- xfs_qm_dqput(dqp);
- return 0;
-}
-
-STATIC int
-xfs_qm_get_rtblks(
- xfs_inode_t *ip,
- xfs_qcnt_t *O_rtblks)
-{
- xfs_filblks_t rtblks; /* total rt blks */
- xfs_extnum_t idx; /* extent record index */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t nextents; /* number of extent entries */
- int error;
-
- ASSERT(XFS_IS_REALTIME_INODE(ip));
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
- return error;
- }
- rtblks = 0;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- for (idx = 0; idx < nextents; idx++)
- rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
- *O_rtblks = (xfs_qcnt_t)rtblks;
- return 0;
-}
-
-/*
- * callback routine supplied to bulkstat(). Given an inumber, find its
- * dquots and update them to account for resources taken by that inode.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_dqusage_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- int *ubused, /* not used */
- int *res) /* result code value */
-{
- xfs_inode_t *ip;
- xfs_qcnt_t nblks, rtblks = 0;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * rootino must have its resources accounted for, not so with the quota
- * inodes.
- */
- if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
- *res = BULKSTAT_RV_NOTHING;
- return XFS_ERROR(EINVAL);
- }
-
- /*
- * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
- * interface expects the inode to be exclusively locked because that's
- * the case in all other instances. It's OK that we do this because
- * quotacheck is done only at mount time.
- */
- error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
- if (error) {
- *res = BULKSTAT_RV_NOTHING;
- return error;
- }
-
- ASSERT(ip->i_delayed_blks == 0);
-
- if (XFS_IS_REALTIME_INODE(ip)) {
- /*
- * Walk thru the extent list and count the realtime blocks.
- */
- error = xfs_qm_get_rtblks(ip, &rtblks);
- if (error)
- goto error0;
- }
-
- nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
-
- /*
- * Add the (disk blocks and inode) resources occupied by this
- * inode to its dquots. We do this adjustment in the incore dquot,
- * and also copy the changes to its buffer.
- * We don't care about putting these changes in a transaction
- * envelope because if we crash in the middle of a 'quotacheck'
- * we have to start from the beginning anyway.
- * Once we're done, we'll log all the dquot bufs.
- *
- * The *QUOTA_ON checks below may look pretty racy, but quotachecks
- * and quotaoffs don't race. (Quotachecks happen at mount time only).
- */
- if (XFS_IS_UQUOTA_ON(mp)) {
- error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
- XFS_DQ_USER, nblks, rtblks);
- if (error)
- goto error0;
- }
-
- if (XFS_IS_GQUOTA_ON(mp)) {
- error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
- XFS_DQ_GROUP, nblks, rtblks);
- if (error)
- goto error0;
- }
-
- if (XFS_IS_PQUOTA_ON(mp)) {
- error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
- XFS_DQ_PROJ, nblks, rtblks);
- if (error)
- goto error0;
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- IRELE(ip);
- *res = BULKSTAT_RV_DIDONE;
- return 0;
-
-error0:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- IRELE(ip);
- *res = BULKSTAT_RV_GIVEUP;
- return error;
-}
-
-/*
- * Walk thru all the filesystem inodes and construct a consistent view
- * of the disk quota world. If the quotacheck fails, disable quotas.
- */
-int
-xfs_qm_quotacheck(
- xfs_mount_t *mp)
-{
- int done, count, error;
- xfs_ino_t lastino;
- size_t structsz;
- xfs_inode_t *uip, *gip;
- uint flags;
-
- count = INT_MAX;
- structsz = 1;
- lastino = 0;
- flags = 0;
-
- ASSERT(mp->m_quotainfo->qi_uquotaip || mp->m_quotainfo->qi_gquotaip);
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- /*
- * There should be no cached dquots. The (simplistic) quotacheck
- * algorithm doesn't like that.
- */
- ASSERT(list_empty(&mp->m_quotainfo->qi_dqlist));
-
- xfs_notice(mp, "Quotacheck needed: Please wait.");
-
- /*
- * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
- * their counters to zero. We need a clean slate.
- * We don't log our changes till later.
- */
- uip = mp->m_quotainfo->qi_uquotaip;
- if (uip) {
- error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA);
- if (error)
- goto error_return;
- flags |= XFS_UQUOTA_CHKD;
- }
-
- gip = mp->m_quotainfo->qi_gquotaip;
- if (gip) {
- error = xfs_qm_dqiterate(mp, gip, XFS_IS_GQUOTA_ON(mp) ?
- XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
- if (error)
- goto error_return;
- flags |= XFS_OQUOTA_CHKD;
- }
-
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters in core.
- */
- error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_dqusage_adjust,
- structsz, NULL, &done);
- if (error)
- break;
-
- } while (!done);
-
- /*
- * We've made all the changes that we need to make incore.
- * Flush them down to disk buffers if everything was updated
- * successfully.
- */
- if (!error)
- error = xfs_qm_dqflush_all(mp, 0);
-
- /*
- * We can get this error if we couldn't do a dquot allocation inside
- * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
- * dirty dquots that might be cached, we just want to get rid of them
- * and turn quotaoff. The dquots won't be attached to any of the inodes
- * at this point (because we intentionally didn't in dqget_noattach).
- */
- if (error) {
- xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
- goto error_return;
- }
-
- /*
- * We didn't log anything, because if we crashed, we'll have to
- * start the quotacheck from scratch anyway. However, we must make
- * sure that our dquot changes are secure before we put the
- * quotacheck'd stamp on the superblock. So, here we do a synchronous
- * flush.
- */
- XFS_bflush(mp->m_ddev_targp);
-
- /*
- * If one type of quotas is off, then it will lose its
- * quotachecked status, since we won't be doing accounting for
- * that type anymore.
- */
- mp->m_qflags &= ~(XFS_OQUOTA_CHKD | XFS_UQUOTA_CHKD);
- mp->m_qflags |= flags;
-
- xfs_qm_dquot_list_print(mp);
-
- error_return:
- if (error) {
- xfs_warn(mp,
- "Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
- error);
- /*
- * We must turn off quotas.
- */
- ASSERT(mp->m_quotainfo != NULL);
- ASSERT(xfs_Gqm != NULL);
- xfs_qm_destroy_quotainfo(mp);
- if (xfs_mount_reset_sbqflags(mp)) {
- xfs_warn(mp,
- "Quotacheck: Failed to reset quota flags.");
- }
- } else
- xfs_notice(mp, "Quotacheck: Done.");
- return (error);
-}
-
-/*
- * This is called after the superblock has been read in and we're ready to
- * iget the quota inodes.
- */
-STATIC int
-xfs_qm_init_quotainos(
- xfs_mount_t *mp)
-{
- xfs_inode_t *uip, *gip;
- int error;
- __int64_t sbflags;
- uint flags;
-
- ASSERT(mp->m_quotainfo);
- uip = gip = NULL;
- sbflags = 0;
- flags = 0;
-
- /*
- * Get the uquota and gquota inodes
- */
- if (xfs_sb_version_hasquota(&mp->m_sb)) {
- if (XFS_IS_UQUOTA_ON(mp) &&
- mp->m_sb.sb_uquotino != NULLFSINO) {
- ASSERT(mp->m_sb.sb_uquotino > 0);
- if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip)))
- return XFS_ERROR(error);
- }
- if (XFS_IS_OQUOTA_ON(mp) &&
- mp->m_sb.sb_gquotino != NULLFSINO) {
- ASSERT(mp->m_sb.sb_gquotino > 0);
- if ((error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip))) {
- if (uip)
- IRELE(uip);
- return XFS_ERROR(error);
- }
- }
- } else {
- flags |= XFS_QMOPT_SBVERSION;
- sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
- XFS_SB_GQUOTINO | XFS_SB_QFLAGS);
- }
-
- /*
- * Create the two inodes, if they don't exist already. The changes
- * made above will get added to a transaction and logged in one of
- * the qino_alloc calls below. If the device is readonly,
- * temporarily switch to read-write to do this.
- */
- if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
- if ((error = xfs_qm_qino_alloc(mp, &uip,
- sbflags | XFS_SB_UQUOTINO,
- flags | XFS_QMOPT_UQUOTA)))
- return XFS_ERROR(error);
-
- flags &= ~XFS_QMOPT_SBVERSION;
- }
- if (XFS_IS_OQUOTA_ON(mp) && gip == NULL) {
- flags |= (XFS_IS_GQUOTA_ON(mp) ?
- XFS_QMOPT_GQUOTA : XFS_QMOPT_PQUOTA);
- error = xfs_qm_qino_alloc(mp, &gip,
- sbflags | XFS_SB_GQUOTINO, flags);
- if (error) {
- if (uip)
- IRELE(uip);
-
- return XFS_ERROR(error);
- }
- }
-
- mp->m_quotainfo->qi_uquotaip = uip;
- mp->m_quotainfo->qi_gquotaip = gip;
-
- return 0;
-}
-
-
-
-/*
- * Just pop the least recently used dquot off the freelist and
- * recycle it. The returned dquot is locked.
- */
-STATIC xfs_dquot_t *
-xfs_qm_dqreclaim_one(void)
-{
- xfs_dquot_t *dqpout;
- xfs_dquot_t *dqp;
- int restarts;
- int startagain;
-
- restarts = 0;
- dqpout = NULL;
-
- /* lockorder: hashchainlock, freelistlock, mplistlock, dqlock, dqflock */
-again:
- startagain = 0;
- mutex_lock(&xfs_Gqm->qm_dqfrlist_lock);
-
- list_for_each_entry(dqp, &xfs_Gqm->qm_dqfrlist, q_freelist) {
- struct xfs_mount *mp = dqp->q_mount;
- xfs_dqlock(dqp);
-
- /*
- * We are racing with dqlookup here. Naturally we don't
- * want to reclaim a dquot that lookup wants. We release the
- * freelist lock and start over, so that lookup will grab
- * both the dquot and the freelistlock.
- */
- if (dqp->dq_flags & XFS_DQ_WANT) {
- ASSERT(! (dqp->dq_flags & XFS_DQ_INACTIVE));
-
- trace_xfs_dqreclaim_want(dqp);
- XQM_STATS_INC(xqmstats.xs_qm_dqwants);
- restarts++;
- startagain = 1;
- goto dqunlock;
- }
-
- /*
- * If the dquot is inactive, we are assured that it is
- * not on the mplist or the hashlist, and that makes our
- * life easier.
- */
- if (dqp->dq_flags & XFS_DQ_INACTIVE) {
- ASSERT(mp == NULL);
- ASSERT(! XFS_DQ_IS_DIRTY(dqp));
- ASSERT(list_empty(&dqp->q_hashlist));
- ASSERT(list_empty(&dqp->q_mplist));
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- dqpout = dqp;
- XQM_STATS_INC(xqmstats.xs_qm_dqinact_reclaims);
- goto dqunlock;
- }
-
- ASSERT(dqp->q_hash);
- ASSERT(!list_empty(&dqp->q_mplist));
-
- /*
- * Try to grab the flush lock. If this dquot is in the process
- * of getting flushed to disk, we don't want to reclaim it.
- */
- if (!xfs_dqflock_nowait(dqp))
- goto dqunlock;
-
- /*
- * We have the flush lock so we know that this is not in the
- * process of being flushed. So, if this is dirty, flush it
- * DELWRI so that we don't get a freelist infested with
- * dirty dquots.
- */
- if (XFS_DQ_IS_DIRTY(dqp)) {
- int error;
-
- trace_xfs_dqreclaim_dirty(dqp);
-
- /*
- * We flush it delayed write, so don't bother
- * releasing the freelist lock.
- */
- error = xfs_qm_dqflush(dqp, 0);
- if (error) {
- xfs_warn(mp, "%s: dquot %p flush failed",
- __func__, dqp);
- }
- goto dqunlock;
- }
-
- /*
- * We're trying to get the hashlock out of order. This races
- * with dqlookup; so, we giveup and goto the next dquot if
- * we couldn't get the hashlock. This way, we won't starve
- * a dqlookup process that holds the hashlock that is
- * waiting for the freelist lock.
- */
- if (!mutex_trylock(&dqp->q_hash->qh_lock)) {
- restarts++;
- goto dqfunlock;
- }
-
- /*
- * This races with dquot allocation code as well as dqflush_all
- * and reclaim code. So, if we failed to grab the mplist lock,
- * giveup everything and start over.
- */
- if (!mutex_trylock(&mp->m_quotainfo->qi_dqlist_lock)) {
- restarts++;
- startagain = 1;
- goto qhunlock;
- }
-
- ASSERT(dqp->q_nrefs == 0);
- list_del_init(&dqp->q_mplist);
- mp->m_quotainfo->qi_dquots--;
- mp->m_quotainfo->qi_dqreclaims++;
- list_del_init(&dqp->q_hashlist);
- dqp->q_hash->qh_version++;
- list_del_init(&dqp->q_freelist);
- xfs_Gqm->qm_dqfrlist_cnt--;
- dqpout = dqp;
- mutex_unlock(&mp->m_quotainfo->qi_dqlist_lock);
-qhunlock:
- mutex_unlock(&dqp->q_hash->qh_lock);
-dqfunlock:
- xfs_dqfunlock(dqp);
-dqunlock:
- xfs_dqunlock(dqp);
- if (dqpout)
- break;
- if (restarts >= XFS_QM_RECLAIM_MAX_RESTARTS)
- break;
- if (startagain) {
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- goto again;
- }
- }
- mutex_unlock(&xfs_Gqm->qm_dqfrlist_lock);
- return dqpout;
-}
-
-/*
- * Traverse the freelist of dquots and attempt to reclaim a maximum of
- * 'howmany' dquots. This operation races with dqlookup(), and attempts to
- * favor the lookup function ...
- */
-STATIC int
-xfs_qm_shake_freelist(
- int howmany)
-{
- int nreclaimed = 0;
- xfs_dquot_t *dqp;
-
- if (howmany <= 0)
- return 0;
-
- while (nreclaimed < howmany) {
- dqp = xfs_qm_dqreclaim_one();
- if (!dqp)
- return nreclaimed;
- xfs_qm_dqdestroy(dqp);
- nreclaimed++;
- }
- return nreclaimed;
-}
-
-/*
- * The kmem_shake interface is invoked when memory is running low.
- */
-/* ARGSUSED */
-STATIC int
-xfs_qm_shake(
- struct shrinker *shrink,
- struct shrink_control *sc)
-{
- int ndqused, nfree, n;
- gfp_t gfp_mask = sc->gfp_mask;
-
- if (!kmem_shake_allow(gfp_mask))
- return 0;
- if (!xfs_Gqm)
- return 0;
-
- nfree = xfs_Gqm->qm_dqfrlist_cnt; /* free dquots */
- /* incore dquots in all f/s's */
- ndqused = atomic_read(&xfs_Gqm->qm_totaldquots) - nfree;
-
- ASSERT(ndqused >= 0);
-
- if (nfree <= ndqused && nfree < ndquot)
- return 0;
-
- ndqused *= xfs_Gqm->qm_dqfree_ratio; /* target # of free dquots */
- n = nfree - ndqused - ndquot; /* # over target */
-
- return xfs_qm_shake_freelist(MAX(nfree, n));
-}
-
-
-/*------------------------------------------------------------------*/
-
-/*
- * Return a new incore dquot. Depending on the number of
- * dquots in the system, we either allocate a new one on the kernel heap,
- * or reclaim a free one.
- * Return value is B_TRUE if we allocated a new dquot, B_FALSE if we managed
- * to reclaim an existing one from the freelist.
- */
-boolean_t
-xfs_qm_dqalloc_incore(
- xfs_dquot_t **O_dqpp)
-{
- xfs_dquot_t *dqp;
-
- /*
- * Check against high water mark to see if we want to pop
- * a nincompoop dquot off the freelist.
- */
- if (atomic_read(&xfs_Gqm->qm_totaldquots) >= ndquot) {
- /*
- * Try to recycle a dquot from the freelist.
- */
- if ((dqp = xfs_qm_dqreclaim_one())) {
- XQM_STATS_INC(xqmstats.xs_qm_dqreclaims);
- /*
- * Just zero the core here. The rest will get
- * reinitialized by caller. XXX we shouldn't even
- * do this zero ...
- */
- memset(&dqp->q_core, 0, sizeof(dqp->q_core));
- *O_dqpp = dqp;
- return B_FALSE;
- }
- XQM_STATS_INC(xqmstats.xs_qm_dqreclaim_misses);
- }
-
- /*
- * Allocate a brand new dquot on the kernel heap and return it
- * to the caller to initialize.
- */
- ASSERT(xfs_Gqm->qm_dqzone != NULL);
- *O_dqpp = kmem_zone_zalloc(xfs_Gqm->qm_dqzone, KM_SLEEP);
- atomic_inc(&xfs_Gqm->qm_totaldquots);
-
- return B_TRUE;
-}
-
-
-/*
- * Start a transaction and write the incore superblock changes to
- * disk. flags parameter indicates which fields have changed.
- */
-int
-xfs_qm_write_sb_changes(
- xfs_mount_t *mp,
- __int64_t flags)
-{
- xfs_trans_t *tp;
- int error;
-
-#ifdef QUOTADEBUG
- xfs_notice(mp, "Writing superblock quota changes");
-#endif
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
- if ((error = xfs_trans_reserve(tp, 0,
- mp->m_sb.sb_sectsize + 128, 0,
- 0,
- XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_mod_sb(tp, flags);
- error = xfs_trans_commit(tp, 0);
-
- return error;
-}
-
-
-/* --------------- utility functions for vnodeops ---------------- */
-
-
-/*
- * Given an inode, a uid, gid and prid make sure that we have
- * allocated relevant dquot(s) on disk, and that we won't exceed inode
- * quotas by creating this file.
- * This also attaches dquot(s) to the given inode after locking it,
- * and returns the dquots corresponding to the uid and/or gid.
- *
- * in : inode (unlocked)
- * out : udquot, gdquot with references taken and unlocked
- */
-int
-xfs_qm_vop_dqalloc(
- struct xfs_inode *ip,
- uid_t uid,
- gid_t gid,
- prid_t prid,
- uint flags,
- struct xfs_dquot **O_udqpp,
- struct xfs_dquot **O_gdqpp)
-{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_dquot *uq, *gq;
- int error;
- uint lockflags;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return 0;
-
- lockflags = XFS_ILOCK_EXCL;
- xfs_ilock(ip, lockflags);
-
- if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
- gid = ip->i_d.di_gid;
-
- /*
- * Attach the dquot(s) to this inode, doing a dquot allocation
- * if necessary. The dquot(s) will not be locked.
- */
- if (XFS_NOT_DQATTACHED(mp, ip)) {
- error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
- if (error) {
- xfs_iunlock(ip, lockflags);
- return error;
- }
- }
-
- uq = gq = NULL;
- if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
- if (ip->i_d.di_uid != uid) {
- /*
- * What we need is the dquot that has this uid, and
- * if we send the inode to dqget, the uid of the inode
- * takes priority over what's sent in the uid argument.
- * We must unlock inode here before calling dqget if
- * we're not sending the inode, because otherwise
- * we'll deadlock by doing trans_reserve while
- * holding ilock.
- */
- xfs_iunlock(ip, lockflags);
- if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t) uid,
- XFS_DQ_USER,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
- &uq))) {
- ASSERT(error != ENOENT);
- return error;
- }
- /*
- * Get the ilock in the right order.
- */
- xfs_dqunlock(uq);
- lockflags = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lockflags);
- } else {
- /*
- * Take an extra reference, because we'll return
- * this to caller
- */
- ASSERT(ip->i_udquot);
- uq = ip->i_udquot;
- xfs_dqlock(uq);
- XFS_DQHOLD(uq);
- xfs_dqunlock(uq);
- }
- }
- if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
- if (ip->i_d.di_gid != gid) {
- xfs_iunlock(ip, lockflags);
- if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)gid,
- XFS_DQ_GROUP,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
- &gq))) {
- if (uq)
- xfs_qm_dqrele(uq);
- ASSERT(error != ENOENT);
- return error;
- }
- xfs_dqunlock(gq);
- lockflags = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lockflags);
- } else {
- ASSERT(ip->i_gdquot);
- gq = ip->i_gdquot;
- xfs_dqlock(gq);
- XFS_DQHOLD(gq);
- xfs_dqunlock(gq);
- }
- } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
- if (xfs_get_projid(ip) != prid) {
- xfs_iunlock(ip, lockflags);
- if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
- XFS_DQ_PROJ,
- XFS_QMOPT_DQALLOC |
- XFS_QMOPT_DOWARN,
- &gq))) {
- if (uq)
- xfs_qm_dqrele(uq);
- ASSERT(error != ENOENT);
- return (error);
- }
- xfs_dqunlock(gq);
- lockflags = XFS_ILOCK_SHARED;
- xfs_ilock(ip, lockflags);
- } else {
- ASSERT(ip->i_gdquot);
- gq = ip->i_gdquot;
- xfs_dqlock(gq);
- XFS_DQHOLD(gq);
- xfs_dqunlock(gq);
- }
- }
- if (uq)
- trace_xfs_dquot_dqalloc(ip);
-
- xfs_iunlock(ip, lockflags);
- if (O_udqpp)
- *O_udqpp = uq;
- else if (uq)
- xfs_qm_dqrele(uq);
- if (O_gdqpp)
- *O_gdqpp = gq;
- else if (gq)
- xfs_qm_dqrele(gq);
- return 0;
-}
-
-/*
- * Actually transfer ownership, and do dquot modifications.
- * These were already reserved.
- */
-xfs_dquot_t *
-xfs_qm_vop_chown(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t **IO_olddq,
- xfs_dquot_t *newdq)
-{
- xfs_dquot_t *prevdq;
- uint bfield = XFS_IS_REALTIME_INODE(ip) ?
- XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
-
- /* old dquot */
- prevdq = *IO_olddq;
- ASSERT(prevdq);
- ASSERT(prevdq != newdq);
-
- xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
- xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
-
- /* the sparkling new dquot */
- xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
- xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
-
- /*
- * Take an extra reference, because the inode
- * is going to keep this dquot pointer even
- * after the trans_commit.
- */
- xfs_dqlock(newdq);
- XFS_DQHOLD(newdq);
- xfs_dqunlock(newdq);
- *IO_olddq = newdq;
-
- return prevdq;
-}
-
-/*
- * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
- */
-int
-xfs_qm_vop_chown_reserve(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp,
- uint flags)
-{
- xfs_mount_t *mp = ip->i_mount;
- uint delblks, blkflags, prjflags = 0;
- xfs_dquot_t *unresudq, *unresgdq, *delblksudq, *delblksgdq;
- int error;
-
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- delblks = ip->i_delayed_blks;
- delblksudq = delblksgdq = unresudq = unresgdq = NULL;
- blkflags = XFS_IS_REALTIME_INODE(ip) ?
- XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
-
- if (XFS_IS_UQUOTA_ON(mp) && udqp &&
- ip->i_d.di_uid != (uid_t)be32_to_cpu(udqp->q_core.d_id)) {
- delblksudq = udqp;
- /*
- * If there are delayed allocation blocks, then we have to
- * unreserve those from the old dquot, and add them to the
- * new dquot.
- */
- if (delblks) {
- ASSERT(ip->i_udquot);
- unresudq = ip->i_udquot;
- }
- }
- if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) {
- if (XFS_IS_PQUOTA_ON(ip->i_mount) &&
- xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id))
- prjflags = XFS_QMOPT_ENOSPC;
-
- if (prjflags ||
- (XFS_IS_GQUOTA_ON(ip->i_mount) &&
- ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id))) {
- delblksgdq = gdqp;
- if (delblks) {
- ASSERT(ip->i_gdquot);
- unresgdq = ip->i_gdquot;
- }
- }
- }
-
- if ((error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
- delblksudq, delblksgdq, ip->i_d.di_nblocks, 1,
- flags | blkflags | prjflags)))
- return (error);
-
- /*
- * Do the delayed blks reservations/unreservations now. Since, these
- * are done without the help of a transaction, if a reservation fails
- * its previous reservations won't be automatically undone by trans
- * code. So, we have to do it manually here.
- */
- if (delblks) {
- /*
- * Do the reservations first. Unreservation can't fail.
- */
- ASSERT(delblksudq || delblksgdq);
- ASSERT(unresudq || unresgdq);
- if ((error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
- delblksudq, delblksgdq, (xfs_qcnt_t)delblks, 0,
- flags | blkflags | prjflags)))
- return (error);
- xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
- unresudq, unresgdq, -((xfs_qcnt_t)delblks), 0,
- blkflags);
- }
-
- return (0);
-}
-
-int
-xfs_qm_vop_rename_dqattach(
- struct xfs_inode **i_tab)
-{
- struct xfs_mount *mp = i_tab[0]->i_mount;
- int i;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return 0;
-
- for (i = 0; (i < 4 && i_tab[i]); i++) {
- struct xfs_inode *ip = i_tab[i];
- int error;
-
- /*
- * Watch out for duplicate entries in the table.
- */
- if (i == 0 || ip != i_tab[i-1]) {
- if (XFS_NOT_DQATTACHED(mp, ip)) {
- error = xfs_qm_dqattach(ip, 0);
- if (error)
- return error;
- }
- }
- }
- return 0;
-}
-
-void
-xfs_qm_vop_create_dqattach(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- struct xfs_dquot *udqp,
- struct xfs_dquot *gdqp)
-{
- struct xfs_mount *mp = tp->t_mountp;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return;
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- if (udqp) {
- xfs_dqlock(udqp);
- XFS_DQHOLD(udqp);
- xfs_dqunlock(udqp);
- ASSERT(ip->i_udquot == NULL);
- ip->i_udquot = udqp;
- ASSERT(XFS_IS_UQUOTA_ON(mp));
- ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
- xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
- }
- if (gdqp) {
- xfs_dqlock(gdqp);
- XFS_DQHOLD(gdqp);
- xfs_dqunlock(gdqp);
- ASSERT(ip->i_gdquot == NULL);
- ip->i_gdquot = gdqp;
- ASSERT(XFS_IS_OQUOTA_ON(mp));
- ASSERT((XFS_IS_GQUOTA_ON(mp) ?
- ip->i_d.di_gid : xfs_get_projid(ip)) ==
- be32_to_cpu(gdqp->q_core.d_id));
- xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
- }
-}
-
diff --git a/fs/xfs/quota/xfs_qm.h b/fs/xfs/quota/xfs_qm.h
deleted file mode 100644
index 567b29b..0000000
--- a/fs/xfs/quota/xfs_qm.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QM_H__
-#define __XFS_QM_H__
-
-#include "xfs_dquot_item.h"
-#include "xfs_dquot.h"
-#include "xfs_quota_priv.h"
-#include "xfs_qm_stats.h"
-
-struct xfs_qm;
-struct xfs_inode;
-
-extern uint ndquot;
-extern struct mutex xfs_Gqm_lock;
-extern struct xfs_qm *xfs_Gqm;
-extern kmem_zone_t *qm_dqzone;
-extern kmem_zone_t *qm_dqtrxzone;
-
-/*
- * Used in xfs_qm_sync called by xfs_sync to count the max times that it can
- * iterate over the mountpt's dquot list in one call.
- */
-#define XFS_QM_SYNC_MAX_RESTARTS 7
-
-/*
- * Ditto, for xfs_qm_dqreclaim_one.
- */
-#define XFS_QM_RECLAIM_MAX_RESTARTS 4
-
-/*
- * Ideal ratio of free to in use dquots. Quota manager makes an attempt
- * to keep this balance.
- */
-#define XFS_QM_DQFREE_RATIO 2
-
-/*
- * Dquot hashtable constants/threshold values.
- */
-#define XFS_QM_HASHSIZE_LOW (PAGE_SIZE / sizeof(xfs_dqhash_t))
-#define XFS_QM_HASHSIZE_HIGH ((PAGE_SIZE * 4) / sizeof(xfs_dqhash_t))
-
-/*
- * This defines the unit of allocation of dquots.
- * Currently, it is just one file system block, and a 4K blk contains 30
- * (136 * 30 = 4080) dquots. It's probably not worth trying to make
- * this more dynamic.
- * XXXsup However, if this number is changed, we have to make sure that we don't
- * implicitly assume that we do allocations in chunks of a single filesystem
- * block in the dquot/xqm code.
- */
-#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
-
-typedef xfs_dqhash_t xfs_dqlist_t;
-
-/*
- * Quota Manager (global) structure. Lives only in core.
- */
-typedef struct xfs_qm {
- xfs_dqlist_t *qm_usr_dqhtable;/* udquot hash table */
- xfs_dqlist_t *qm_grp_dqhtable;/* gdquot hash table */
- uint qm_dqhashmask; /* # buckets in dq hashtab - 1 */
- struct list_head qm_dqfrlist; /* freelist of dquots */
- struct mutex qm_dqfrlist_lock;
- int qm_dqfrlist_cnt;
- atomic_t qm_totaldquots; /* total incore dquots */
- uint qm_nrefs; /* file systems with quota on */
- int qm_dqfree_ratio;/* ratio of free to inuse dquots */
- kmem_zone_t *qm_dqzone; /* dquot mem-alloc zone */
- kmem_zone_t *qm_dqtrxzone; /* t_dqinfo of transactions */
-} xfs_qm_t;
-
-/*
- * Various quota information for individual filesystems.
- * The mount structure keeps a pointer to this.
- */
-typedef struct xfs_quotainfo {
- xfs_inode_t *qi_uquotaip; /* user quota inode */
- xfs_inode_t *qi_gquotaip; /* group quota inode */
- struct list_head qi_dqlist; /* all dquots in filesys */
- struct mutex qi_dqlist_lock;
- int qi_dquots;
- int qi_dqreclaims; /* a change here indicates
- a removal in the dqlist */
- time_t qi_btimelimit; /* limit for blks timer */
- time_t qi_itimelimit; /* limit for inodes timer */
- time_t qi_rtbtimelimit;/* limit for rt blks timer */
- xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */
- xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */
- xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */
- struct mutex qi_quotaofflock;/* to serialize quotaoff */
- xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
- uint qi_dqperchunk; /* # ondisk dqs in above chunk */
- xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */
- xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */
- xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */
- xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
- xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
- xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
-} xfs_quotainfo_t;
-
-
-extern void xfs_trans_mod_dquot(xfs_trans_t *, xfs_dquot_t *, uint, long);
-extern int xfs_trans_reserve_quota_bydquots(xfs_trans_t *, xfs_mount_t *,
- xfs_dquot_t *, xfs_dquot_t *, long, long, uint);
-extern void xfs_trans_dqjoin(xfs_trans_t *, xfs_dquot_t *);
-extern void xfs_trans_log_dquot(xfs_trans_t *, xfs_dquot_t *);
-
-/*
- * We keep the usr and grp dquots separately so that locking will be easier
- * to do at commit time. All transactions that we know of at this point
- * affect no more than two dquots of one type. Hence, the TRANS_MAXDQS value.
- */
-#define XFS_QM_TRANS_MAXDQS 2
-typedef struct xfs_dquot_acct {
- xfs_dqtrx_t dqa_usrdquots[XFS_QM_TRANS_MAXDQS];
- xfs_dqtrx_t dqa_grpdquots[XFS_QM_TRANS_MAXDQS];
-} xfs_dquot_acct_t;
-
-/*
- * Users are allowed to have a usage exceeding their softlimit for
- * a period this long.
- */
-#define XFS_QM_BTIMELIMIT (7 * 24*60*60) /* 1 week */
-#define XFS_QM_RTBTIMELIMIT (7 * 24*60*60) /* 1 week */
-#define XFS_QM_ITIMELIMIT (7 * 24*60*60) /* 1 week */
-
-#define XFS_QM_BWARNLIMIT 5
-#define XFS_QM_IWARNLIMIT 5
-#define XFS_QM_RTBWARNLIMIT 5
-
-extern void xfs_qm_destroy_quotainfo(xfs_mount_t *);
-extern int xfs_qm_quotacheck(xfs_mount_t *);
-extern int xfs_qm_write_sb_changes(xfs_mount_t *, __int64_t);
-
-/* dquot stuff */
-extern boolean_t xfs_qm_dqalloc_incore(xfs_dquot_t **);
-extern int xfs_qm_dqpurge_all(xfs_mount_t *, uint);
-extern void xfs_qm_dqrele_all_inodes(xfs_mount_t *, uint);
-
-/* quota ops */
-extern int xfs_qm_scall_trunc_qfiles(xfs_mount_t *, uint);
-extern int xfs_qm_scall_getquota(xfs_mount_t *, xfs_dqid_t, uint,
- fs_disk_quota_t *);
-extern int xfs_qm_scall_setqlim(xfs_mount_t *, xfs_dqid_t, uint,
- fs_disk_quota_t *);
-extern int xfs_qm_scall_getqstat(xfs_mount_t *, fs_quota_stat_t *);
-extern int xfs_qm_scall_quotaon(xfs_mount_t *, uint);
-extern int xfs_qm_scall_quotaoff(xfs_mount_t *, uint);
-
-#ifdef DEBUG
-extern int xfs_qm_internalqcheck(xfs_mount_t *);
-#else
-#define xfs_qm_internalqcheck(mp) (0)
-#endif
-
-#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c
deleted file mode 100644
index a0a829a..0000000
--- a/fs/xfs/quota/xfs_qm_bhv.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-
-STATIC void
-xfs_fill_statvfs_from_dquot(
- struct kstatfs *statp,
- xfs_disk_dquot_t *dp)
-{
- __uint64_t limit;
-
- limit = dp->d_blk_softlimit ?
- be64_to_cpu(dp->d_blk_softlimit) :
- be64_to_cpu(dp->d_blk_hardlimit);
- if (limit && statp->f_blocks > limit) {
- statp->f_blocks = limit;
- statp->f_bfree = statp->f_bavail =
- (statp->f_blocks > be64_to_cpu(dp->d_bcount)) ?
- (statp->f_blocks - be64_to_cpu(dp->d_bcount)) : 0;
- }
-
- limit = dp->d_ino_softlimit ?
- be64_to_cpu(dp->d_ino_softlimit) :
- be64_to_cpu(dp->d_ino_hardlimit);
- if (limit && statp->f_files > limit) {
- statp->f_files = limit;
- statp->f_ffree =
- (statp->f_files > be64_to_cpu(dp->d_icount)) ?
- (statp->f_ffree - be64_to_cpu(dp->d_icount)) : 0;
- }
-}
-
-
-/*
- * Directory tree accounting is implemented using project quotas, where
- * the project identifier is inherited from parent directories.
- * A statvfs (df, etc.) of a directory that is using project quota should
- * return a statvfs of the project, not the entire filesystem.
- * This makes such trees appear as if they are filesystems in themselves.
- */
-void
-xfs_qm_statvfs(
- xfs_inode_t *ip,
- struct kstatfs *statp)
-{
- xfs_mount_t *mp = ip->i_mount;
- xfs_dquot_t *dqp;
-
- if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) {
- xfs_fill_statvfs_from_dquot(statp, &dqp->q_core);
- xfs_qm_dqput(dqp);
- }
-}
-
-int
-xfs_qm_newmount(
- xfs_mount_t *mp,
- uint *needquotamount,
- uint *quotaflags)
-{
- uint quotaondisk;
- uint uquotaondisk = 0, gquotaondisk = 0, pquotaondisk = 0;
-
- quotaondisk = xfs_sb_version_hasquota(&mp->m_sb) &&
- (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_ACCT);
-
- if (quotaondisk) {
- uquotaondisk = mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT;
- pquotaondisk = mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT;
- gquotaondisk = mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT;
- }
-
- /*
- * If the device itself is read-only, we can't allow
- * the user to change the state of quota on the mount -
- * this would generate a transaction on the ro device,
- * which would lead to an I/O error and shutdown
- */
-
- if (((uquotaondisk && !XFS_IS_UQUOTA_ON(mp)) ||
- (!uquotaondisk && XFS_IS_UQUOTA_ON(mp)) ||
- (pquotaondisk && !XFS_IS_PQUOTA_ON(mp)) ||
- (!pquotaondisk && XFS_IS_PQUOTA_ON(mp)) ||
- (gquotaondisk && !XFS_IS_GQUOTA_ON(mp)) ||
- (!gquotaondisk && XFS_IS_OQUOTA_ON(mp))) &&
- xfs_dev_is_read_only(mp, "changing quota state")) {
- xfs_warn(mp, "please mount with%s%s%s%s.",
- (!quotaondisk ? "out quota" : ""),
- (uquotaondisk ? " usrquota" : ""),
- (pquotaondisk ? " prjquota" : ""),
- (gquotaondisk ? " grpquota" : ""));
- return XFS_ERROR(EPERM);
- }
-
- if (XFS_IS_QUOTA_ON(mp) || quotaondisk) {
- /*
- * Call mount_quotas at this point only if we won't have to do
- * a quotacheck.
- */
- if (quotaondisk && !XFS_QM_NEED_QUOTACHECK(mp)) {
- /*
- * If an error occurred, qm_mount_quotas code
- * has already disabled quotas. So, just finish
- * mounting, and get on with the boring life
- * without disk quotas.
- */
- xfs_qm_mount_quotas(mp);
- } else {
- /*
- * Clear the quota flags, but remember them. This
- * is so that the quota code doesn't get invoked
- * before we're ready. This can happen when an
- * inode goes inactive and wants to free blocks,
- * or via xfs_log_mount_finish.
- */
- *needquotamount = B_TRUE;
- *quotaflags = mp->m_qflags;
- mp->m_qflags = 0;
- }
- }
-
- return 0;
-}
-
-void __init
-xfs_qm_init(void)
-{
- printk(KERN_INFO "SGI XFS Quota Management subsystem\n");
- mutex_init(&xfs_Gqm_lock);
- xfs_qm_init_procfs();
-}
-
-void __exit
-xfs_qm_exit(void)
-{
- xfs_qm_cleanup_procfs();
- if (qm_dqzone)
- kmem_zone_destroy(qm_dqzone);
- if (qm_dqtrxzone)
- kmem_zone_destroy(qm_dqtrxzone);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.c b/fs/xfs/quota/xfs_qm_stats.c
deleted file mode 100644
index 8671a0b..0000000
--- a/fs/xfs/quota/xfs_qm_stats.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_qm.h"
-
-struct xqmstats xqmstats;
-
-static int xqm_proc_show(struct seq_file *m, void *v)
-{
- /* maximum; incore; ratio free to inuse; freelist */
- seq_printf(m, "%d\t%d\t%d\t%u\n",
- ndquot,
- xfs_Gqm? atomic_read(&xfs_Gqm->qm_totaldquots) : 0,
- xfs_Gqm? xfs_Gqm->qm_dqfree_ratio : 0,
- xfs_Gqm? xfs_Gqm->qm_dqfrlist_cnt : 0);
- return 0;
-}
-
-static int xqm_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqm_proc_show, NULL);
-}
-
-static const struct file_operations xqm_proc_fops = {
- .owner = THIS_MODULE,
- .open = xqm_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int xqmstat_proc_show(struct seq_file *m, void *v)
-{
- /* quota performance statistics */
- seq_printf(m, "qm %u %u %u %u %u %u %u %u\n",
- xqmstats.xs_qm_dqreclaims,
- xqmstats.xs_qm_dqreclaim_misses,
- xqmstats.xs_qm_dquot_dups,
- xqmstats.xs_qm_dqcachemisses,
- xqmstats.xs_qm_dqcachehits,
- xqmstats.xs_qm_dqwants,
- xqmstats.xs_qm_dqshake_reclaims,
- xqmstats.xs_qm_dqinact_reclaims);
- return 0;
-}
-
-static int xqmstat_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, xqmstat_proc_show, NULL);
-}
-
-static const struct file_operations xqmstat_proc_fops = {
- .owner = THIS_MODULE,
- .open = xqmstat_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-void
-xfs_qm_init_procfs(void)
-{
- proc_create("fs/xfs/xqmstat", 0, NULL, &xqmstat_proc_fops);
- proc_create("fs/xfs/xqm", 0, NULL, &xqm_proc_fops);
-}
-
-void
-xfs_qm_cleanup_procfs(void)
-{
- remove_proc_entry("fs/xfs/xqm", NULL);
- remove_proc_entry("fs/xfs/xqmstat", NULL);
-}
diff --git a/fs/xfs/quota/xfs_qm_stats.h b/fs/xfs/quota/xfs_qm_stats.h
deleted file mode 100644
index 5b964fc..0000000
--- a/fs/xfs/quota/xfs_qm_stats.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QM_STATS_H__
-#define __XFS_QM_STATS_H__
-
-#if defined(CONFIG_PROC_FS) && !defined(XFS_STATS_OFF)
-
-/*
- * XQM global statistics
- */
-struct xqmstats {
- __uint32_t xs_qm_dqreclaims;
- __uint32_t xs_qm_dqreclaim_misses;
- __uint32_t xs_qm_dquot_dups;
- __uint32_t xs_qm_dqcachemisses;
- __uint32_t xs_qm_dqcachehits;
- __uint32_t xs_qm_dqwants;
- __uint32_t xs_qm_dqshake_reclaims;
- __uint32_t xs_qm_dqinact_reclaims;
-};
-
-extern struct xqmstats xqmstats;
-
-# define XQM_STATS_INC(count) ( (count)++ )
-
-extern void xfs_qm_init_procfs(void);
-extern void xfs_qm_cleanup_procfs(void);
-
-#else
-
-# define XQM_STATS_INC(count) do { } while (0)
-
-static inline void xfs_qm_init_procfs(void) { };
-static inline void xfs_qm_cleanup_procfs(void) { };
-
-#endif
-
-#endif /* __XFS_QM_STATS_H__ */
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
deleted file mode 100644
index 2dadb15..0000000
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ /dev/null
@@ -1,1259 +0,0 @@
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include <linux/capability.h>
-
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_utils.h"
-#include "xfs_qm.h"
-#include "xfs_trace.h"
-
-STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
- uint);
-STATIC uint xfs_qm_export_flags(uint);
-STATIC uint xfs_qm_export_qtype_flags(uint);
-STATIC void xfs_qm_export_dquot(xfs_mount_t *, xfs_disk_dquot_t *,
- fs_disk_quota_t *);
-
-
-/*
- * Turn off quota accounting and/or enforcement for all udquots and/or
- * gdquots. Called only at unmount time.
- *
- * This assumes that there are no dquots of this file system cached
- * incore, and modifies the ondisk dquot directly. Therefore, for example,
- * it is an error to call this twice, without purging the cache.
- */
-int
-xfs_qm_scall_quotaoff(
- xfs_mount_t *mp,
- uint flags)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- uint dqtype;
- int error;
- uint inactivate_flags;
- xfs_qoff_logitem_t *qoffstart;
- int nculprits;
-
- /*
- * No file system can have quotas enabled on disk but not in core.
- * Note that quota utilities (like quotaoff) _expect_
- * errno == EEXIST here.
- */
- if ((mp->m_qflags & flags) == 0)
- return XFS_ERROR(EEXIST);
- error = 0;
-
- flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
-
- /*
- * We don't want to deal with two quotaoffs messing up each other,
- * so we're going to serialize it. quotaoff isn't exactly a performance
- * critical thing.
- * If quotaoff, then we must be dealing with the root filesystem.
- */
- ASSERT(q);
- mutex_lock(&q->qi_quotaofflock);
-
- /*
- * If we're just turning off quota enforcement, change mp and go.
- */
- if ((flags & XFS_ALL_QUOTA_ACCT) == 0) {
- mp->m_qflags &= ~(flags);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_sb.sb_qflags = mp->m_qflags;
- spin_unlock(&mp->m_sb_lock);
- mutex_unlock(&q->qi_quotaofflock);
-
- /* XXX what to do if error ? Revert back to old vals incore ? */
- error = xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS);
- return (error);
- }
-
- dqtype = 0;
- inactivate_flags = 0;
- /*
- * If accounting is off, we must turn enforcement off, clear the
- * quota 'CHKD' certificate to make it known that we have to
- * do a quotacheck the next time this quota is turned on.
- */
- if (flags & XFS_UQUOTA_ACCT) {
- dqtype |= XFS_QMOPT_UQUOTA;
- flags |= (XFS_UQUOTA_CHKD | XFS_UQUOTA_ENFD);
- inactivate_flags |= XFS_UQUOTA_ACTIVE;
- }
- if (flags & XFS_GQUOTA_ACCT) {
- dqtype |= XFS_QMOPT_GQUOTA;
- flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
- inactivate_flags |= XFS_GQUOTA_ACTIVE;
- } else if (flags & XFS_PQUOTA_ACCT) {
- dqtype |= XFS_QMOPT_PQUOTA;
- flags |= (XFS_OQUOTA_CHKD | XFS_OQUOTA_ENFD);
- inactivate_flags |= XFS_PQUOTA_ACTIVE;
- }
-
- /*
- * Nothing to do? Don't complain. This happens when we're just
- * turning off quota enforcement.
- */
- if ((mp->m_qflags & flags) == 0)
- goto out_unlock;
-
- /*
- * Write the LI_QUOTAOFF log record, and do SB changes atomically,
- * and synchronously. If we fail to write, we should abort the
- * operation as it cannot be recovered safely if we crash.
- */
- error = xfs_qm_log_quotaoff(mp, &qoffstart, flags);
- if (error)
- goto out_unlock;
-
- /*
- * Next we clear the XFS_MOUNT_*DQ_ACTIVE bit(s) in the mount struct
- * to take care of the race between dqget and quotaoff. We don't take
- * any special locks to reset these bits. All processes need to check
- * these bits *after* taking inode lock(s) to see if the particular
- * quota type is in the process of being turned off. If *ACTIVE, it is
- * guaranteed that all dquot structures and all quotainode ptrs will all
- * stay valid as long as that inode is kept locked.
- *
- * There is no turning back after this.
- */
- mp->m_qflags &= ~inactivate_flags;
-
- /*
- * Give back all the dquot reference(s) held by inodes.
- * Here we go thru every single incore inode in this file system, and
- * do a dqrele on the i_udquot/i_gdquot that it may have.
- * Essentially, as long as somebody has an inode locked, this guarantees
- * that quotas will not be turned off. This is handy because in a
- * transaction once we lock the inode(s) and check for quotaon, we can
- * depend on the quota inodes (and other things) being valid as long as
- * we keep the lock(s).
- */
- xfs_qm_dqrele_all_inodes(mp, flags);
-
- /*
- * Next we make the changes in the quota flag in the mount struct.
- * This isn't protected by a particular lock directly, because we
- * don't want to take a mrlock every time we depend on quotas being on.
- */
- mp->m_qflags &= ~(flags);
-
- /*
- * Go through all the dquots of this file system and purge them,
- * according to what was turned off. We may not be able to get rid
- * of all dquots, because dquots can have temporary references that
- * are not attached to inodes. eg. xfs_setattr, xfs_create.
- * So, if we couldn't purge all the dquots from the filesystem,
- * we can't get rid of the incore data structures.
- */
- while ((nculprits = xfs_qm_dqpurge_all(mp, dqtype)))
- delay(10 * nculprits);
-
- /*
- * Transactions that had started before ACTIVE state bit was cleared
- * could have logged many dquots, so they'd have higher LSNs than
- * the first QUOTAOFF log record does. If we happen to crash when
- * the tail of the log has gone past the QUOTAOFF record, but
- * before the last dquot modification, those dquots __will__
- * recover, and that's not good.
- *
- * So, we have QUOTAOFF start and end logitems; the start
- * logitem won't get overwritten until the end logitem appears...
- */
- error = xfs_qm_log_quotaoff_end(mp, qoffstart, flags);
- if (error) {
- /* We're screwed now. Shutdown is the only option. */
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- goto out_unlock;
- }
-
- /*
- * If quotas is completely disabled, close shop.
- */
- if (((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET1) ||
- ((flags & XFS_MOUNT_QUOTA_ALL) == XFS_MOUNT_QUOTA_SET2)) {
- mutex_unlock(&q->qi_quotaofflock);
- xfs_qm_destroy_quotainfo(mp);
- return (0);
- }
-
- /*
- * Release our quotainode references if we don't need them anymore.
- */
- if ((dqtype & XFS_QMOPT_UQUOTA) && q->qi_uquotaip) {
- IRELE(q->qi_uquotaip);
- q->qi_uquotaip = NULL;
- }
- if ((dqtype & (XFS_QMOPT_GQUOTA|XFS_QMOPT_PQUOTA)) && q->qi_gquotaip) {
- IRELE(q->qi_gquotaip);
- q->qi_gquotaip = NULL;
- }
-
-out_unlock:
- mutex_unlock(&q->qi_quotaofflock);
- return error;
-}
-
-STATIC int
-xfs_qm_scall_trunc_qfile(
- struct xfs_mount *mp,
- xfs_ino_t ino)
-{
- struct xfs_inode *ip;
- struct xfs_trans *tp;
- int error;
-
- if (ino == NULLFSINO)
- return 0;
-
- error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
- if (error)
- return error;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_TRUNCATE_FILE);
- error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_ITRUNCATE_LOG_COUNT);
- if (error) {
- xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- goto out_put;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
-
- error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK, 1);
- if (error) {
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
- XFS_TRANS_ABORT);
- goto out_unlock;
- }
-
- xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
-
-out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-out_put:
- IRELE(ip);
- return error;
-}
-
-int
-xfs_qm_scall_trunc_qfiles(
- xfs_mount_t *mp,
- uint flags)
-{
- int error = 0, error2 = 0;
-
- if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) {
- xfs_debug(mp, "%s: flags=%x m_qflags=%x\n",
- __func__, flags, mp->m_qflags);
- return XFS_ERROR(EINVAL);
- }
-
- if (flags & XFS_DQ_USER)
- error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino);
- if (flags & (XFS_DQ_GROUP|XFS_DQ_PROJ))
- error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino);
-
- return error ? error : error2;
-}
-
-/*
- * Switch on (a given) quota enforcement for a filesystem. This takes
- * effect immediately.
- * (Switching on quota accounting must be done at mount time.)
- */
-int
-xfs_qm_scall_quotaon(
- xfs_mount_t *mp,
- uint flags)
-{
- int error;
- uint qf;
- __int64_t sbflags;
-
- flags &= (XFS_ALL_QUOTA_ACCT | XFS_ALL_QUOTA_ENFD);
- /*
- * Switching on quota accounting must be done at mount time.
- */
- flags &= ~(XFS_ALL_QUOTA_ACCT);
-
- sbflags = 0;
-
- if (flags == 0) {
- xfs_debug(mp, "%s: zero flags, m_qflags=%x\n",
- __func__, mp->m_qflags);
- return XFS_ERROR(EINVAL);
- }
-
- /* No fs can turn on quotas with a delayed effect */
- ASSERT((flags & XFS_ALL_QUOTA_ACCT) == 0);
-
- /*
- * Can't enforce without accounting. We check the superblock
- * qflags here instead of m_qflags because rootfs can have
- * quota acct on ondisk without m_qflags' knowing.
- */
- if (((flags & XFS_UQUOTA_ACCT) == 0 &&
- (mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) == 0 &&
- (flags & XFS_UQUOTA_ENFD))
- ||
- ((flags & XFS_PQUOTA_ACCT) == 0 &&
- (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&
- (flags & XFS_GQUOTA_ACCT) == 0 &&
- (mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) == 0 &&
- (flags & XFS_OQUOTA_ENFD))) {
- xfs_debug(mp,
- "%s: Can't enforce without acct, flags=%x sbflags=%x\n",
- __func__, flags, mp->m_sb.sb_qflags);
- return XFS_ERROR(EINVAL);
- }
- /*
- * If everything's up to-date incore, then don't waste time.
- */
- if ((mp->m_qflags & flags) == flags)
- return XFS_ERROR(EEXIST);
-
- /*
- * Change sb_qflags on disk but not incore mp->qflags
- * if this is the root filesystem.
- */
- spin_lock(&mp->m_sb_lock);
- qf = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = qf | flags;
- spin_unlock(&mp->m_sb_lock);
-
- /*
- * There's nothing to change if it's the same.
- */
- if ((qf & flags) == flags && sbflags == 0)
- return XFS_ERROR(EEXIST);
- sbflags |= XFS_SB_QFLAGS;
-
- if ((error = xfs_qm_write_sb_changes(mp, sbflags)))
- return (error);
- /*
- * If we aren't trying to switch on quota enforcement, we are done.
- */
- if (((mp->m_sb.sb_qflags & XFS_UQUOTA_ACCT) !=
- (mp->m_qflags & XFS_UQUOTA_ACCT)) ||
- ((mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) !=
- (mp->m_qflags & XFS_PQUOTA_ACCT)) ||
- ((mp->m_sb.sb_qflags & XFS_GQUOTA_ACCT) !=
- (mp->m_qflags & XFS_GQUOTA_ACCT)) ||
- (flags & XFS_ALL_QUOTA_ENFD) == 0)
- return (0);
-
- if (! XFS_IS_QUOTA_RUNNING(mp))
- return XFS_ERROR(ESRCH);
-
- /*
- * Switch on quota enforcement in core.
- */
- mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
- mp->m_qflags |= (flags & XFS_ALL_QUOTA_ENFD);
- mutex_unlock(&mp->m_quotainfo->qi_quotaofflock);
-
- return (0);
-}
-
-
-/*
- * Return quota status information, such as uquota-off, enforcements, etc.
- */
-int
-xfs_qm_scall_getqstat(
- struct xfs_mount *mp,
- struct fs_quota_stat *out)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- struct xfs_inode *uip, *gip;
- boolean_t tempuqip, tempgqip;
-
- uip = gip = NULL;
- tempuqip = tempgqip = B_FALSE;
- memset(out, 0, sizeof(fs_quota_stat_t));
-
- out->qs_version = FS_QSTAT_VERSION;
- if (!xfs_sb_version_hasquota(&mp->m_sb)) {
- out->qs_uquota.qfs_ino = NULLFSINO;
- out->qs_gquota.qfs_ino = NULLFSINO;
- return (0);
- }
- out->qs_flags = (__uint16_t) xfs_qm_export_flags(mp->m_qflags &
- (XFS_ALL_QUOTA_ACCT|
- XFS_ALL_QUOTA_ENFD));
- out->qs_pad = 0;
- out->qs_uquota.qfs_ino = mp->m_sb.sb_uquotino;
- out->qs_gquota.qfs_ino = mp->m_sb.sb_gquotino;
-
- if (q) {
- uip = q->qi_uquotaip;
- gip = q->qi_gquotaip;
- }
- if (!uip && mp->m_sb.sb_uquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
- 0, 0, &uip) == 0)
- tempuqip = B_TRUE;
- }
- if (!gip && mp->m_sb.sb_gquotino != NULLFSINO) {
- if (xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
- 0, 0, &gip) == 0)
- tempgqip = B_TRUE;
- }
- if (uip) {
- out->qs_uquota.qfs_nblks = uip->i_d.di_nblocks;
- out->qs_uquota.qfs_nextents = uip->i_d.di_nextents;
- if (tempuqip)
- IRELE(uip);
- }
- if (gip) {
- out->qs_gquota.qfs_nblks = gip->i_d.di_nblocks;
- out->qs_gquota.qfs_nextents = gip->i_d.di_nextents;
- if (tempgqip)
- IRELE(gip);
- }
- if (q) {
- out->qs_incoredqs = q->qi_dquots;
- out->qs_btimelimit = q->qi_btimelimit;
- out->qs_itimelimit = q->qi_itimelimit;
- out->qs_rtbtimelimit = q->qi_rtbtimelimit;
- out->qs_bwarnlimit = q->qi_bwarnlimit;
- out->qs_iwarnlimit = q->qi_iwarnlimit;
- }
- return 0;
-}
-
-#define XFS_DQ_MASK \
- (FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
-
-/*
- * Adjust quota limits, and start/stop timers accordingly.
- */
-int
-xfs_qm_scall_setqlim(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- fs_disk_quota_t *newlim)
-{
- struct xfs_quotainfo *q = mp->m_quotainfo;
- xfs_disk_dquot_t *ddq;
- xfs_dquot_t *dqp;
- xfs_trans_t *tp;
- int error;
- xfs_qcnt_t hard, soft;
-
- if (newlim->d_fieldmask & ~XFS_DQ_MASK)
- return EINVAL;
- if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
- return 0;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
- if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
- 0, 0, XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return (error);
- }
-
- /*
- * We don't want to race with a quotaoff so take the quotaoff lock.
- * (We don't hold an inode lock, so there's nothing else to stop
- * a quotaoff from happening). (XXXThis doesn't currently happen
- * because we take the vfslock before calling xfs_qm_sysent).
- */
- mutex_lock(&q->qi_quotaofflock);
-
- /*
- * Get the dquot (locked), and join it to the transaction.
- * Allocate the dquot if this doesn't exist.
- */
- if ((error = xfs_qm_dqget(mp, NULL, id, type, XFS_QMOPT_DQALLOC, &dqp))) {
- xfs_trans_cancel(tp, XFS_TRANS_ABORT);
- ASSERT(error != ENOENT);
- goto out_unlock;
- }
- xfs_trans_dqjoin(tp, dqp);
- ddq = &dqp->q_core;
-
- /*
- * Make sure that hardlimits are >= soft limits before changing.
- */
- hard = (newlim->d_fieldmask & FS_DQ_BHARD) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_hardlimit) :
- be64_to_cpu(ddq->d_blk_hardlimit);
- soft = (newlim->d_fieldmask & FS_DQ_BSOFT) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_blk_softlimit) :
- be64_to_cpu(ddq->d_blk_softlimit);
- if (hard == 0 || hard >= soft) {
- ddq->d_blk_hardlimit = cpu_to_be64(hard);
- ddq->d_blk_softlimit = cpu_to_be64(soft);
- if (id == 0) {
- q->qi_bhardlimit = hard;
- q->qi_bsoftlimit = soft;
- }
- } else {
- xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft);
- }
- hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) :
- be64_to_cpu(ddq->d_rtb_hardlimit);
- soft = (newlim->d_fieldmask & FS_DQ_RTBSOFT) ?
- (xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_softlimit) :
- be64_to_cpu(ddq->d_rtb_softlimit);
- if (hard == 0 || hard >= soft) {
- ddq->d_rtb_hardlimit = cpu_to_be64(hard);
- ddq->d_rtb_softlimit = cpu_to_be64(soft);
- if (id == 0) {
- q->qi_rtbhardlimit = hard;
- q->qi_rtbsoftlimit = soft;
- }
- } else {
- xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft);
- }
-
- hard = (newlim->d_fieldmask & FS_DQ_IHARD) ?
- (xfs_qcnt_t) newlim->d_ino_hardlimit :
- be64_to_cpu(ddq->d_ino_hardlimit);
- soft = (newlim->d_fieldmask & FS_DQ_ISOFT) ?
- (xfs_qcnt_t) newlim->d_ino_softlimit :
- be64_to_cpu(ddq->d_ino_softlimit);
- if (hard == 0 || hard >= soft) {
- ddq->d_ino_hardlimit = cpu_to_be64(hard);
- ddq->d_ino_softlimit = cpu_to_be64(soft);
- if (id == 0) {
- q->qi_ihardlimit = hard;
- q->qi_isoftlimit = soft;
- }
- } else {
- xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft);
- }
-
- /*
- * Update warnings counter(s) if requested
- */
- if (newlim->d_fieldmask & FS_DQ_BWARNS)
- ddq->d_bwarns = cpu_to_be16(newlim->d_bwarns);
- if (newlim->d_fieldmask & FS_DQ_IWARNS)
- ddq->d_iwarns = cpu_to_be16(newlim->d_iwarns);
- if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
- ddq->d_rtbwarns = cpu_to_be16(newlim->d_rtbwarns);
-
- if (id == 0) {
- /*
- * Timelimits for the super user set the relative time
- * the other users can be over quota for this file system.
- * If it is zero a default is used. Ditto for the default
- * soft and hard limit values (already done, above), and
- * for warnings.
- */
- if (newlim->d_fieldmask & FS_DQ_BTIMER) {
- q->qi_btimelimit = newlim->d_btimer;
- ddq->d_btimer = cpu_to_be32(newlim->d_btimer);
- }
- if (newlim->d_fieldmask & FS_DQ_ITIMER) {
- q->qi_itimelimit = newlim->d_itimer;
- ddq->d_itimer = cpu_to_be32(newlim->d_itimer);
- }
- if (newlim->d_fieldmask & FS_DQ_RTBTIMER) {
- q->qi_rtbtimelimit = newlim->d_rtbtimer;
- ddq->d_rtbtimer = cpu_to_be32(newlim->d_rtbtimer);
- }
- if (newlim->d_fieldmask & FS_DQ_BWARNS)
- q->qi_bwarnlimit = newlim->d_bwarns;
- if (newlim->d_fieldmask & FS_DQ_IWARNS)
- q->qi_iwarnlimit = newlim->d_iwarns;
- if (newlim->d_fieldmask & FS_DQ_RTBWARNS)
- q->qi_rtbwarnlimit = newlim->d_rtbwarns;
- } else {
- /*
- * If the user is now over quota, start the timelimit.
- * The user will not be 'warned'.
- * Note that we keep the timers ticking, whether enforcement
- * is on or off. We don't really want to bother with iterating
- * over all ondisk dquots and turning the timers on/off.
- */
- xfs_qm_adjust_dqtimers(mp, ddq);
- }
- dqp->dq_flags |= XFS_DQ_DIRTY;
- xfs_trans_log_dquot(tp, dqp);
-
- error = xfs_trans_commit(tp, 0);
- xfs_qm_dqprint(dqp);
- xfs_qm_dqrele(dqp);
-
- out_unlock:
- mutex_unlock(&q->qi_quotaofflock);
- return error;
-}
-
-int
-xfs_qm_scall_getquota(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- fs_disk_quota_t *out)
-{
- xfs_dquot_t *dqp;
- int error;
-
- /*
- * Try to get the dquot. We don't want it allocated on disk, so
- * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
- * exist, we'll get ENOENT back.
- */
- if ((error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp))) {
- return (error);
- }
-
- /*
- * If everything's NULL, this dquot doesn't quite exist as far as
- * our utility programs are concerned.
- */
- if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
- xfs_qm_dqput(dqp);
- return XFS_ERROR(ENOENT);
- }
- /* xfs_qm_dqprint(dqp); */
- /*
- * Convert the disk dquot to the exportable format
- */
- xfs_qm_export_dquot(mp, &dqp->q_core, out);
- xfs_qm_dqput(dqp);
- return (error ? XFS_ERROR(EFAULT) : 0);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff_end(
- xfs_mount_t *mp,
- xfs_qoff_logitem_t *startqoff,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- xfs_qoff_logitem_t *qoffi;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF_END);
-
- if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_qoff_logitem_t) * 2,
- 0, 0, XFS_DEFAULT_LOG_COUNT))) {
- xfs_trans_cancel(tp, 0);
- return (error);
- }
-
- qoffi = xfs_trans_get_qoff_item(tp, startqoff,
- flags & XFS_ALL_QUOTA_ACCT);
- xfs_trans_log_quotaoff_item(tp, qoffi);
-
- /*
- * We have to make sure that the transaction is secure on disk before we
- * return and actually stop quota accounting. So, make it synchronous.
- * We don't care about quotoff's performance.
- */
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
- return (error);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
- xfs_mount_t *mp,
- xfs_qoff_logitem_t **qoffstartp,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- xfs_qoff_logitem_t *qoffi=NULL;
- uint oldsbqflag=0;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QUOTAOFF);
- if ((error = xfs_trans_reserve(tp, 0,
- sizeof(xfs_qoff_logitem_t) * 2 +
- mp->m_sb.sb_sectsize + 128,
- 0,
- 0,
- XFS_DEFAULT_LOG_COUNT))) {
- goto error0;
- }
-
- qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
- xfs_trans_log_quotaoff_item(tp, qoffi);
-
- spin_lock(&mp->m_sb_lock);
- oldsbqflag = mp->m_sb.sb_qflags;
- mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
- spin_unlock(&mp->m_sb_lock);
-
- xfs_mod_sb(tp, XFS_SB_QFLAGS);
-
- /*
- * We have to make sure that the transaction is secure on disk before we
- * return and actually stop quota accounting. So, make it synchronous.
- * We don't care about quotoff's performance.
- */
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, 0);
-
-error0:
- if (error) {
- xfs_trans_cancel(tp, 0);
- /*
- * No one else is modifying sb_qflags, so this is OK.
- * We still hold the quotaofflock.
- */
- spin_lock(&mp->m_sb_lock);
- mp->m_sb.sb_qflags = oldsbqflag;
- spin_unlock(&mp->m_sb_lock);
- }
- *qoffstartp = qoffi;
- return (error);
-}
-
-
-/*
- * Translate an internal style on-disk-dquot to the exportable format.
- * The main differences are that the counters/limits are all in Basic
- * Blocks (BBs) instead of the internal FSBs, and all on-disk data has
- * to be converted to the native endianness.
- */
-STATIC void
-xfs_qm_export_dquot(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *src,
- struct fs_disk_quota *dst)
-{
- memset(dst, 0, sizeof(*dst));
- dst->d_version = FS_DQUOT_VERSION; /* different from src->d_version */
- dst->d_flags = xfs_qm_export_qtype_flags(src->d_flags);
- dst->d_id = be32_to_cpu(src->d_id);
- dst->d_blk_hardlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_hardlimit));
- dst->d_blk_softlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_blk_softlimit));
- dst->d_ino_hardlimit = be64_to_cpu(src->d_ino_hardlimit);
- dst->d_ino_softlimit = be64_to_cpu(src->d_ino_softlimit);
- dst->d_bcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_bcount));
- dst->d_icount = be64_to_cpu(src->d_icount);
- dst->d_btimer = be32_to_cpu(src->d_btimer);
- dst->d_itimer = be32_to_cpu(src->d_itimer);
- dst->d_iwarns = be16_to_cpu(src->d_iwarns);
- dst->d_bwarns = be16_to_cpu(src->d_bwarns);
- dst->d_rtb_hardlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_hardlimit));
- dst->d_rtb_softlimit =
- XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtb_softlimit));
- dst->d_rtbcount = XFS_FSB_TO_BB(mp, be64_to_cpu(src->d_rtbcount));
- dst->d_rtbtimer = be32_to_cpu(src->d_rtbtimer);
- dst->d_rtbwarns = be16_to_cpu(src->d_rtbwarns);
-
- /*
- * Internally, we don't reset all the timers when quota enforcement
- * gets turned off. No need to confuse the user level code,
- * so return zeroes in that case.
- */
- if ((!XFS_IS_UQUOTA_ENFORCED(mp) && src->d_flags == XFS_DQ_USER) ||
- (!XFS_IS_OQUOTA_ENFORCED(mp) &&
- (src->d_flags & (XFS_DQ_PROJ | XFS_DQ_GROUP)))) {
- dst->d_btimer = 0;
- dst->d_itimer = 0;
- dst->d_rtbtimer = 0;
- }
-
-#ifdef DEBUG
- if (((XFS_IS_UQUOTA_ENFORCED(mp) && dst->d_flags == FS_USER_QUOTA) ||
- (XFS_IS_OQUOTA_ENFORCED(mp) &&
- (dst->d_flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)))) &&
- dst->d_id != 0) {
- if (((int) dst->d_bcount >= (int) dst->d_blk_softlimit) &&
- (dst->d_blk_softlimit > 0)) {
- ASSERT(dst->d_btimer != 0);
- }
- if (((int) dst->d_icount >= (int) dst->d_ino_softlimit) &&
- (dst->d_ino_softlimit > 0)) {
- ASSERT(dst->d_itimer != 0);
- }
- }
-#endif
-}
-
-STATIC uint
-xfs_qm_export_qtype_flags(
- uint flags)
-{
- /*
- * Can't be more than one, or none.
- */
- ASSERT((flags & (FS_PROJ_QUOTA | FS_USER_QUOTA)) !=
- (FS_PROJ_QUOTA | FS_USER_QUOTA));
- ASSERT((flags & (FS_PROJ_QUOTA | FS_GROUP_QUOTA)) !=
- (FS_PROJ_QUOTA | FS_GROUP_QUOTA));
- ASSERT((flags & (FS_USER_QUOTA | FS_GROUP_QUOTA)) !=
- (FS_USER_QUOTA | FS_GROUP_QUOTA));
- ASSERT((flags & (FS_PROJ_QUOTA|FS_USER_QUOTA|FS_GROUP_QUOTA)) != 0);
-
- return (flags & XFS_DQ_USER) ?
- FS_USER_QUOTA : (flags & XFS_DQ_PROJ) ?
- FS_PROJ_QUOTA : FS_GROUP_QUOTA;
-}
-
-STATIC uint
-xfs_qm_export_flags(
- uint flags)
-{
- uint uflags;
-
- uflags = 0;
- if (flags & XFS_UQUOTA_ACCT)
- uflags |= FS_QUOTA_UDQ_ACCT;
- if (flags & XFS_PQUOTA_ACCT)
- uflags |= FS_QUOTA_PDQ_ACCT;
- if (flags & XFS_GQUOTA_ACCT)
- uflags |= FS_QUOTA_GDQ_ACCT;
- if (flags & XFS_UQUOTA_ENFD)
- uflags |= FS_QUOTA_UDQ_ENFD;
- if (flags & (XFS_OQUOTA_ENFD)) {
- uflags |= (flags & XFS_GQUOTA_ACCT) ?
- FS_QUOTA_GDQ_ENFD : FS_QUOTA_PDQ_ENFD;
- }
- return (uflags);
-}
-
-
-STATIC int
-xfs_dqrele_inode(
- struct xfs_inode *ip,
- struct xfs_perag *pag,
- int flags)
-{
- /* skip quota inodes */
- if (ip == ip->i_mount->m_quotainfo->qi_uquotaip ||
- ip == ip->i_mount->m_quotainfo->qi_gquotaip) {
- ASSERT(ip->i_udquot == NULL);
- ASSERT(ip->i_gdquot == NULL);
- return 0;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) {
- xfs_qm_dqrele(ip->i_udquot);
- ip->i_udquot = NULL;
- }
- if (flags & (XFS_PQUOTA_ACCT|XFS_GQUOTA_ACCT) && ip->i_gdquot) {
- xfs_qm_dqrele(ip->i_gdquot);
- ip->i_gdquot = NULL;
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return 0;
-}
-
-
-/*
- * Go thru all the inodes in the file system, releasing their dquots.
- *
- * Note that the mount structure gets modified to indicate that quotas are off
- * AFTER this, in the case of quotaoff.
- */
-void
-xfs_qm_dqrele_all_inodes(
- struct xfs_mount *mp,
- uint flags)
-{
- ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags);
-}
-
-/*------------------------------------------------------------------------*/
-#ifdef DEBUG
-/*
- * This contains all the test functions for XFS disk quotas.
- * Currently it does a quota accounting check. ie. it walks through
- * all inodes in the file system, calculating the dquot accounting fields,
- * and prints out any inconsistencies.
- */
-xfs_dqhash_t *qmtest_udqtab;
-xfs_dqhash_t *qmtest_gdqtab;
-int qmtest_hashmask;
-int qmtest_nfails;
-struct mutex qcheck_lock;
-
-#define DQTEST_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
- (__psunsigned_t)(id)) & \
- (qmtest_hashmask - 1))
-
-#define DQTEST_HASH(mp, id, type) ((type & XFS_DQ_USER) ? \
- (qmtest_udqtab + \
- DQTEST_HASHVAL(mp, id)) : \
- (qmtest_gdqtab + \
- DQTEST_HASHVAL(mp, id)))
-
-#define DQTEST_LIST_PRINT(l, NXT, title) \
-{ \
- xfs_dqtest_t *dqp; int i = 0;\
- xfs_debug(NULL, "%s (#%d)", title, (int) (l)->qh_nelems); \
- for (dqp = (xfs_dqtest_t *)(l)->qh_next; dqp != NULL; \
- dqp = (xfs_dqtest_t *)dqp->NXT) { \
- xfs_debug(dqp->q_mount, \
- " %d. \"%d (%s)\" bcnt = %d, icnt = %d", \
- ++i, dqp->d_id, DQFLAGTO_TYPESTR(dqp), \
- dqp->d_bcount, dqp->d_icount); } \
-}
-
-typedef struct dqtest {
- uint dq_flags; /* various flags (XFS_DQ_*) */
- struct list_head q_hashlist;
- xfs_dqhash_t *q_hash; /* the hashchain header */
- xfs_mount_t *q_mount; /* filesystem this relates to */
- xfs_dqid_t d_id; /* user id or group id */
- xfs_qcnt_t d_bcount; /* # disk blocks owned by the user */
- xfs_qcnt_t d_icount; /* # inodes owned by the user */
-} xfs_dqtest_t;
-
-STATIC void
-xfs_qm_hashinsert(xfs_dqhash_t *h, xfs_dqtest_t *dqp)
-{
- list_add(&dqp->q_hashlist, &h->qh_list);
- h->qh_version++;
- h->qh_nelems++;
-}
-STATIC void
-xfs_qm_dqtest_print(
- struct xfs_mount *mp,
- struct dqtest *d)
-{
- xfs_debug(mp, "-----------DQTEST DQUOT----------------");
- xfs_debug(mp, "---- dquot ID = %d", d->d_id);
- xfs_debug(mp, "---- fs = 0x%p", d->q_mount);
- xfs_debug(mp, "---- bcount = %Lu (0x%x)",
- d->d_bcount, (int)d->d_bcount);
- xfs_debug(mp, "---- icount = %Lu (0x%x)",
- d->d_icount, (int)d->d_icount);
- xfs_debug(mp, "---------------------------");
-}
-
-STATIC void
-xfs_qm_dqtest_failed(
- xfs_dqtest_t *d,
- xfs_dquot_t *dqp,
- char *reason,
- xfs_qcnt_t a,
- xfs_qcnt_t b,
- int error)
-{
- qmtest_nfails++;
- if (error)
- xfs_debug(dqp->q_mount,
- "quotacheck failed id=%d, err=%d\nreason: %s",
- d->d_id, error, reason);
- else
- xfs_debug(dqp->q_mount,
- "quotacheck failed id=%d (%s) [%d != %d]",
- d->d_id, reason, (int)a, (int)b);
- xfs_qm_dqtest_print(dqp->q_mount, d);
- if (dqp)
- xfs_qm_dqprint(dqp);
-}
-
-STATIC int
-xfs_dqtest_cmp2(
- xfs_dqtest_t *d,
- xfs_dquot_t *dqp)
-{
- int err = 0;
- if (be64_to_cpu(dqp->q_core.d_icount) != d->d_icount) {
- xfs_qm_dqtest_failed(d, dqp, "icount mismatch",
- be64_to_cpu(dqp->q_core.d_icount),
- d->d_icount, 0);
- err++;
- }
- if (be64_to_cpu(dqp->q_core.d_bcount) != d->d_bcount) {
- xfs_qm_dqtest_failed(d, dqp, "bcount mismatch",
- be64_to_cpu(dqp->q_core.d_bcount),
- d->d_bcount, 0);
- err++;
- }
- if (dqp->q_core.d_blk_softlimit &&
- be64_to_cpu(dqp->q_core.d_bcount) >=
- be64_to_cpu(dqp->q_core.d_blk_softlimit)) {
- if (!dqp->q_core.d_btimer && dqp->q_core.d_id) {
- xfs_debug(dqp->q_mount,
- "%d [%s] BLK TIMER NOT STARTED",
- d->d_id, DQFLAGTO_TYPESTR(d));
- err++;
- }
- }
- if (dqp->q_core.d_ino_softlimit &&
- be64_to_cpu(dqp->q_core.d_icount) >=
- be64_to_cpu(dqp->q_core.d_ino_softlimit)) {
- if (!dqp->q_core.d_itimer && dqp->q_core.d_id) {
- xfs_debug(dqp->q_mount,
- "%d [%s] INO TIMER NOT STARTED",
- d->d_id, DQFLAGTO_TYPESTR(d));
- err++;
- }
- }
-#ifdef QUOTADEBUG
- if (!err) {
- xfs_debug(dqp->q_mount, "%d [%s] qchecked",
- d->d_id, DQFLAGTO_TYPESTR(d));
- }
-#endif
- return (err);
-}
-
-STATIC void
-xfs_dqtest_cmp(
- xfs_dqtest_t *d)
-{
- xfs_dquot_t *dqp;
- int error;
-
- /* xfs_qm_dqtest_print(d); */
- if ((error = xfs_qm_dqget(d->q_mount, NULL, d->d_id, d->dq_flags, 0,
- &dqp))) {
- xfs_qm_dqtest_failed(d, NULL, "dqget failed", 0, 0, error);
- return;
- }
- xfs_dqtest_cmp2(d, dqp);
- xfs_qm_dqput(dqp);
-}
-
-STATIC int
-xfs_qm_internalqcheck_dqget(
- xfs_mount_t *mp,
- xfs_dqid_t id,
- uint type,
- xfs_dqtest_t **O_dq)
-{
- xfs_dqtest_t *d;
- xfs_dqhash_t *h;
-
- h = DQTEST_HASH(mp, id, type);
- list_for_each_entry(d, &h->qh_list, q_hashlist) {
- if (d->d_id == id && mp == d->q_mount) {
- *O_dq = d;
- return (0);
- }
- }
- d = kmem_zalloc(sizeof(xfs_dqtest_t), KM_SLEEP);
- d->dq_flags = type;
- d->d_id = id;
- d->q_mount = mp;
- d->q_hash = h;
- INIT_LIST_HEAD(&d->q_hashlist);
- xfs_qm_hashinsert(h, d);
- *O_dq = d;
- return (0);
-}
-
-STATIC void
-xfs_qm_internalqcheck_get_dquots(
- xfs_mount_t *mp,
- xfs_dqid_t uid,
- xfs_dqid_t projid,
- xfs_dqid_t gid,
- xfs_dqtest_t **ud,
- xfs_dqtest_t **gd)
-{
- if (XFS_IS_UQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, uid, XFS_DQ_USER, ud);
- if (XFS_IS_GQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, gid, XFS_DQ_GROUP, gd);
- else if (XFS_IS_PQUOTA_ON(mp))
- xfs_qm_internalqcheck_dqget(mp, projid, XFS_DQ_PROJ, gd);
-}
-
-
-STATIC void
-xfs_qm_internalqcheck_dqadjust(
- xfs_inode_t *ip,
- xfs_dqtest_t *d)
-{
- d->d_icount++;
- d->d_bcount += (xfs_qcnt_t)ip->i_d.di_nblocks;
-}
-
-STATIC int
-xfs_qm_internalqcheck_adjust(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_ino_t ino, /* inode number to get data for */
- void __user *buffer, /* not used */
- int ubsize, /* not used */
- int *ubused, /* not used */
- int *res) /* bulkstat result code */
-{
- xfs_inode_t *ip;
- xfs_dqtest_t *ud, *gd;
- uint lock_flags;
- boolean_t ipreleased;
- int error;
-
- ASSERT(XFS_IS_QUOTA_RUNNING(mp));
-
- if (ino == mp->m_sb.sb_uquotino || ino == mp->m_sb.sb_gquotino) {
- *res = BULKSTAT_RV_NOTHING;
- xfs_debug(mp, "%s: ino=%llu, uqino=%llu, gqino=%llu\n",
- __func__, (unsigned long long) ino,
- (unsigned long long) mp->m_sb.sb_uquotino,
- (unsigned long long) mp->m_sb.sb_gquotino);
- return XFS_ERROR(EINVAL);
- }
- ipreleased = B_FALSE;
- again:
- lock_flags = XFS_ILOCK_SHARED;
- if ((error = xfs_iget(mp, NULL, ino, 0, lock_flags, &ip))) {
- *res = BULKSTAT_RV_NOTHING;
- return (error);
- }
-
- /*
- * This inode can have blocks after eof which can get released
- * when we send it to inactive. Since we don't check the dquot
- * until the after all our calculations are done, we must get rid
- * of those now.
- */
- if (! ipreleased) {
- xfs_iunlock(ip, lock_flags);
- IRELE(ip);
- ipreleased = B_TRUE;
- goto again;
- }
- xfs_qm_internalqcheck_get_dquots(mp,
- (xfs_dqid_t) ip->i_d.di_uid,
- (xfs_dqid_t) xfs_get_projid(ip),
- (xfs_dqid_t) ip->i_d.di_gid,
- &ud, &gd);
- if (XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(ud);
- xfs_qm_internalqcheck_dqadjust(ip, ud);
- }
- if (XFS_IS_OQUOTA_ON(mp)) {
- ASSERT(gd);
- xfs_qm_internalqcheck_dqadjust(ip, gd);
- }
- xfs_iunlock(ip, lock_flags);
- IRELE(ip);
- *res = BULKSTAT_RV_DIDONE;
- return (0);
-}
-
-
-/* PRIVATE, debugging */
-int
-xfs_qm_internalqcheck(
- xfs_mount_t *mp)
-{
- xfs_ino_t lastino;
- int done, count;
- int i;
- int error;
-
- lastino = 0;
- qmtest_hashmask = 32;
- count = 5;
- done = 0;
- qmtest_nfails = 0;
-
- if (! XFS_IS_QUOTA_ON(mp))
- return XFS_ERROR(ESRCH);
-
- xfs_log_force(mp, XFS_LOG_SYNC);
- XFS_bflush(mp->m_ddev_targp);
- xfs_log_force(mp, XFS_LOG_SYNC);
- XFS_bflush(mp->m_ddev_targp);
-
- mutex_lock(&qcheck_lock);
- /* There should be absolutely no quota activity while this
- is going on. */
- qmtest_udqtab = kmem_zalloc(qmtest_hashmask *
- sizeof(xfs_dqhash_t), KM_SLEEP);
- qmtest_gdqtab = kmem_zalloc(qmtest_hashmask *
- sizeof(xfs_dqhash_t), KM_SLEEP);
- do {
- /*
- * Iterate thru all the inodes in the file system,
- * adjusting the corresponding dquot counters
- */
- error = xfs_bulkstat(mp, &lastino, &count,
- xfs_qm_internalqcheck_adjust,
- 0, NULL, &done);
- if (error) {
- xfs_debug(mp, "Bulkstat returned error 0x%x", error);
- break;
- }
- } while (!done);
-
- xfs_debug(mp, "Checking results against system dquots");
- for (i = 0; i < qmtest_hashmask; i++) {
- xfs_dqtest_t *d, *n;
- xfs_dqhash_t *h;
-
- h = &qmtest_udqtab[i];
- list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
- xfs_dqtest_cmp(d);
- kmem_free(d);
- }
- h = &qmtest_gdqtab[i];
- list_for_each_entry_safe(d, n, &h->qh_list, q_hashlist) {
- xfs_dqtest_cmp(d);
- kmem_free(d);
- }
- }
-
- if (qmtest_nfails) {
- xfs_debug(mp, "******** quotacheck failed ********");
- xfs_debug(mp, "failures = %d", qmtest_nfails);
- } else {
- xfs_debug(mp, "******** quotacheck successful! ********");
- }
- kmem_free(qmtest_udqtab);
- kmem_free(qmtest_gdqtab);
- mutex_unlock(&qcheck_lock);
- return (qmtest_nfails);
-}
-
-#endif /* DEBUG */
diff --git a/fs/xfs/quota/xfs_quota_priv.h b/fs/xfs/quota/xfs_quota_priv.h
deleted file mode 100644
index 94a3d92..0000000
--- a/fs/xfs/quota/xfs_quota_priv.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright (c) 2000-2003 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_QUOTA_PRIV_H__
-#define __XFS_QUOTA_PRIV_H__
-
-/*
- * Number of bmaps that we ask from bmapi when doing a quotacheck.
- * We make this restriction to keep the memory usage to a minimum.
- */
-#define XFS_DQITER_MAP_SIZE 10
-
-/*
- * Hash into a bucket in the dquot hash table, based on <mp, id>.
- */
-#define XFS_DQ_HASHVAL(mp, id) (((__psunsigned_t)(mp) + \
- (__psunsigned_t)(id)) & \
- (xfs_Gqm->qm_dqhashmask - 1))
-#define XFS_DQ_HASH(mp, id, type) (type == XFS_DQ_USER ? \
- (xfs_Gqm->qm_usr_dqhtable + \
- XFS_DQ_HASHVAL(mp, id)) : \
- (xfs_Gqm->qm_grp_dqhtable + \
- XFS_DQ_HASHVAL(mp, id)))
-#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \
- !dqp->q_core.d_blk_hardlimit && \
- !dqp->q_core.d_blk_softlimit && \
- !dqp->q_core.d_rtb_hardlimit && \
- !dqp->q_core.d_rtb_softlimit && \
- !dqp->q_core.d_ino_hardlimit && \
- !dqp->q_core.d_ino_softlimit && \
- !dqp->q_core.d_bcount && \
- !dqp->q_core.d_rtbcount && \
- !dqp->q_core.d_icount)
-
-#define DQFLAGTO_TYPESTR(d) (((d)->dq_flags & XFS_DQ_USER) ? "USR" : \
- (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \
- (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???")))
-
-#endif /* __XFS_QUOTA_PRIV_H__ */
diff --git a/fs/xfs/quota/xfs_trans_dquot.c b/fs/xfs/quota/xfs_trans_dquot.c
deleted file mode 100644
index 2a36487..0000000
--- a/fs/xfs/quota/xfs_trans_dquot.c
+++ /dev/null
@@ -1,895 +0,0 @@
-/*
- * Copyright (c) 2000-2002 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_sb.h"
-#include "xfs_ag.h"
-#include "xfs_alloc.h"
-#include "xfs_quota.h"
-#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_inode.h"
-#include "xfs_itable.h"
-#include "xfs_bmap.h"
-#include "xfs_rtalloc.h"
-#include "xfs_error.h"
-#include "xfs_attr.h"
-#include "xfs_buf_item.h"
-#include "xfs_trans_priv.h"
-#include "xfs_qm.h"
-
-STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
-
-/*
- * Add the locked dquot to the transaction.
- * The dquot must be locked, and it cannot be associated with any
- * transaction.
- */
-void
-xfs_trans_dqjoin(
- xfs_trans_t *tp,
- xfs_dquot_t *dqp)
-{
- ASSERT(dqp->q_transp != tp);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(dqp->q_logitem.qli_dquot == dqp);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &dqp->q_logitem.qli_item);
-
- /*
- * Initialize i_transp so we can later determine if this dquot is
- * associated with this transaction.
- */
- dqp->q_transp = tp;
-}
-
-
-/*
- * This is called to mark the dquot as needing
- * to be logged when the transaction is committed. The dquot must
- * already be associated with the given transaction.
- * Note that it marks the entire transaction as dirty. In the ordinary
- * case, this gets called via xfs_trans_commit, after the transaction
- * is already dirty. However, there's nothing stop this from getting
- * called directly, as done by xfs_qm_scall_setqlim. Hence, the TRANS_DIRTY
- * flag.
- */
-void
-xfs_trans_log_dquot(
- xfs_trans_t *tp,
- xfs_dquot_t *dqp)
-{
- ASSERT(dqp->q_transp == tp);
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
-
- tp->t_flags |= XFS_TRANS_DIRTY;
- dqp->q_logitem.qli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-/*
- * Carry forward whatever is left of the quota blk reservation to
- * the spanky new transaction
- */
-void
-xfs_trans_dup_dqinfo(
- xfs_trans_t *otp,
- xfs_trans_t *ntp)
-{
- xfs_dqtrx_t *oq, *nq;
- int i,j;
- xfs_dqtrx_t *oqa, *nqa;
-
- if (!otp->t_dqinfo)
- return;
-
- xfs_trans_alloc_dqinfo(ntp);
- oqa = otp->t_dqinfo->dqa_usrdquots;
- nqa = ntp->t_dqinfo->dqa_usrdquots;
-
- /*
- * Because the quota blk reservation is carried forward,
- * it is also necessary to carry forward the DQ_DIRTY flag.
- */
- if(otp->t_flags & XFS_TRANS_DQ_DIRTY)
- ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
- for (j = 0; j < 2; j++) {
- for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
- if (oqa[i].qt_dquot == NULL)
- break;
- oq = &oqa[i];
- nq = &nqa[i];
-
- nq->qt_dquot = oq->qt_dquot;
- nq->qt_bcount_delta = nq->qt_icount_delta = 0;
- nq->qt_rtbcount_delta = 0;
-
- /*
- * Transfer whatever is left of the reservations.
- */
- nq->qt_blk_res = oq->qt_blk_res - oq->qt_blk_res_used;
- oq->qt_blk_res = oq->qt_blk_res_used;
-
- nq->qt_rtblk_res = oq->qt_rtblk_res -
- oq->qt_rtblk_res_used;
- oq->qt_rtblk_res = oq->qt_rtblk_res_used;
-
- nq->qt_ino_res = oq->qt_ino_res - oq->qt_ino_res_used;
- oq->qt_ino_res = oq->qt_ino_res_used;
-
- }
- oqa = otp->t_dqinfo->dqa_grpdquots;
- nqa = ntp->t_dqinfo->dqa_grpdquots;
- }
-}
-
-/*
- * Wrap around mod_dquot to account for both user and group quotas.
- */
-void
-xfs_trans_mod_dquot_byino(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- uint field,
- long delta)
-{
- xfs_mount_t *mp = tp->t_mountp;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) ||
- !XFS_IS_QUOTA_ON(mp) ||
- ip->i_ino == mp->m_sb.sb_uquotino ||
- ip->i_ino == mp->m_sb.sb_gquotino)
- return;
-
- if (tp->t_dqinfo == NULL)
- xfs_trans_alloc_dqinfo(tp);
-
- if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
- (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
- if (XFS_IS_OQUOTA_ON(mp) && ip->i_gdquot)
- (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
-}
-
-STATIC xfs_dqtrx_t *
-xfs_trans_get_dqtrx(
- xfs_trans_t *tp,
- xfs_dquot_t *dqp)
-{
- int i;
- xfs_dqtrx_t *qa;
-
- qa = XFS_QM_ISUDQ(dqp) ?
- tp->t_dqinfo->dqa_usrdquots : tp->t_dqinfo->dqa_grpdquots;
-
- for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
- if (qa[i].qt_dquot == NULL ||
- qa[i].qt_dquot == dqp)
- return &qa[i];
- }
-
- return NULL;
-}
-
-/*
- * Make the changes in the transaction structure.
- * The moral equivalent to xfs_trans_mod_sb().
- * We don't touch any fields in the dquot, so we don't care
- * if it's locked or not (most of the time it won't be).
- */
-void
-xfs_trans_mod_dquot(
- xfs_trans_t *tp,
- xfs_dquot_t *dqp,
- uint field,
- long delta)
-{
- xfs_dqtrx_t *qtrx;
-
- ASSERT(tp);
- ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
- qtrx = NULL;
-
- if (tp->t_dqinfo == NULL)
- xfs_trans_alloc_dqinfo(tp);
- /*
- * Find either the first free slot or the slot that belongs
- * to this dquot.
- */
- qtrx = xfs_trans_get_dqtrx(tp, dqp);
- ASSERT(qtrx);
- if (qtrx->qt_dquot == NULL)
- qtrx->qt_dquot = dqp;
-
- switch (field) {
-
- /*
- * regular disk blk reservation
- */
- case XFS_TRANS_DQ_RES_BLKS:
- qtrx->qt_blk_res += (ulong)delta;
- break;
-
- /*
- * inode reservation
- */
- case XFS_TRANS_DQ_RES_INOS:
- qtrx->qt_ino_res += (ulong)delta;
- break;
-
- /*
- * disk blocks used.
- */
- case XFS_TRANS_DQ_BCOUNT:
- if (qtrx->qt_blk_res && delta > 0) {
- qtrx->qt_blk_res_used += (ulong)delta;
- ASSERT(qtrx->qt_blk_res >= qtrx->qt_blk_res_used);
- }
- qtrx->qt_bcount_delta += delta;
- break;
-
- case XFS_TRANS_DQ_DELBCOUNT:
- qtrx->qt_delbcnt_delta += delta;
- break;
-
- /*
- * Inode Count
- */
- case XFS_TRANS_DQ_ICOUNT:
- if (qtrx->qt_ino_res && delta > 0) {
- qtrx->qt_ino_res_used += (ulong)delta;
- ASSERT(qtrx->qt_ino_res >= qtrx->qt_ino_res_used);
- }
- qtrx->qt_icount_delta += delta;
- break;
-
- /*
- * rtblk reservation
- */
- case XFS_TRANS_DQ_RES_RTBLKS:
- qtrx->qt_rtblk_res += (ulong)delta;
- break;
-
- /*
- * rtblk count
- */
- case XFS_TRANS_DQ_RTBCOUNT:
- if (qtrx->qt_rtblk_res && delta > 0) {
- qtrx->qt_rtblk_res_used += (ulong)delta;
- ASSERT(qtrx->qt_rtblk_res >= qtrx->qt_rtblk_res_used);
- }
- qtrx->qt_rtbcount_delta += delta;
- break;
-
- case XFS_TRANS_DQ_DELRTBCOUNT:
- qtrx->qt_delrtb_delta += delta;
- break;
-
- default:
- ASSERT(0);
- }
- tp->t_flags |= XFS_TRANS_DQ_DIRTY;
-}
-
-
-/*
- * Given an array of dqtrx structures, lock all the dquots associated
- * and join them to the transaction, provided they have been modified.
- * We know that the highest number of dquots (of one type - usr OR grp),
- * involved in a transaction is 2 and that both usr and grp combined - 3.
- * So, we don't attempt to make this very generic.
- */
-STATIC void
-xfs_trans_dqlockedjoin(
- xfs_trans_t *tp,
- xfs_dqtrx_t *q)
-{
- ASSERT(q[0].qt_dquot != NULL);
- if (q[1].qt_dquot == NULL) {
- xfs_dqlock(q[0].qt_dquot);
- xfs_trans_dqjoin(tp, q[0].qt_dquot);
- } else {
- ASSERT(XFS_QM_TRANS_MAXDQS == 2);
- xfs_dqlock2(q[0].qt_dquot, q[1].qt_dquot);
- xfs_trans_dqjoin(tp, q[0].qt_dquot);
- xfs_trans_dqjoin(tp, q[1].qt_dquot);
- }
-}
-
-
-/*
- * Called by xfs_trans_commit() and similar in spirit to
- * xfs_trans_apply_sb_deltas().
- * Go thru all the dquots belonging to this transaction and modify the
- * INCORE dquot to reflect the actual usages.
- * Unreserve just the reservations done by this transaction.
- * dquot is still left locked at exit.
- */
-void
-xfs_trans_apply_dquot_deltas(
- xfs_trans_t *tp)
-{
- int i, j;
- xfs_dquot_t *dqp;
- xfs_dqtrx_t *qtrx, *qa;
- xfs_disk_dquot_t *d;
- long totalbdelta;
- long totalrtbdelta;
-
- if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
- return;
-
- ASSERT(tp->t_dqinfo);
- qa = tp->t_dqinfo->dqa_usrdquots;
- for (j = 0; j < 2; j++) {
- if (qa[0].qt_dquot == NULL) {
- qa = tp->t_dqinfo->dqa_grpdquots;
- continue;
- }
-
- /*
- * Lock all of the dquots and join them to the transaction.
- */
- xfs_trans_dqlockedjoin(tp, qa);
-
- for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
- qtrx = &qa[i];
- /*
- * The array of dquots is filled
- * sequentially, not sparsely.
- */
- if ((dqp = qtrx->qt_dquot) == NULL)
- break;
-
- ASSERT(XFS_DQ_IS_LOCKED(dqp));
- ASSERT(dqp->q_transp == tp);
-
- /*
- * adjust the actual number of blocks used
- */
- d = &dqp->q_core;
-
- /*
- * The issue here is - sometimes we don't make a blkquota
- * reservation intentionally to be fair to users
- * (when the amount is small). On the other hand,
- * delayed allocs do make reservations, but that's
- * outside of a transaction, so we have no
- * idea how much was really reserved.
- * So, here we've accumulated delayed allocation blks and
- * non-delay blks. The assumption is that the
- * delayed ones are always reserved (outside of a
- * transaction), and the others may or may not have
- * quota reservations.
- */
- totalbdelta = qtrx->qt_bcount_delta +
- qtrx->qt_delbcnt_delta;
- totalrtbdelta = qtrx->qt_rtbcount_delta +
- qtrx->qt_delrtb_delta;
-#ifdef QUOTADEBUG
- if (totalbdelta < 0)
- ASSERT(be64_to_cpu(d->d_bcount) >=
- (xfs_qcnt_t) -totalbdelta);
-
- if (totalrtbdelta < 0)
- ASSERT(be64_to_cpu(d->d_rtbcount) >=
- (xfs_qcnt_t) -totalrtbdelta);
-
- if (qtrx->qt_icount_delta < 0)
- ASSERT(be64_to_cpu(d->d_icount) >=
- (xfs_qcnt_t) -qtrx->qt_icount_delta);
-#endif
- if (totalbdelta)
- be64_add_cpu(&d->d_bcount, (xfs_qcnt_t)totalbdelta);
-
- if (qtrx->qt_icount_delta)
- be64_add_cpu(&d->d_icount, (xfs_qcnt_t)qtrx->qt_icount_delta);
-
- if (totalrtbdelta)
- be64_add_cpu(&d->d_rtbcount, (xfs_qcnt_t)totalrtbdelta);
-
- /*
- * Get any default limits in use.
- * Start/reset the timer(s) if needed.
- */
- if (d->d_id) {
- xfs_qm_adjust_dqlimits(tp->t_mountp, d);
- xfs_qm_adjust_dqtimers(tp->t_mountp, d);
- }
-
- dqp->dq_flags |= XFS_DQ_DIRTY;
- /*
- * add this to the list of items to get logged
- */
- xfs_trans_log_dquot(tp, dqp);
- /*
- * Take off what's left of the original reservation.
- * In case of delayed allocations, there's no
- * reservation that a transaction structure knows of.
- */
- if (qtrx->qt_blk_res != 0) {
- if (qtrx->qt_blk_res != qtrx->qt_blk_res_used) {
- if (qtrx->qt_blk_res >
- qtrx->qt_blk_res_used)
- dqp->q_res_bcount -= (xfs_qcnt_t)
- (qtrx->qt_blk_res -
- qtrx->qt_blk_res_used);
- else
- dqp->q_res_bcount -= (xfs_qcnt_t)
- (qtrx->qt_blk_res_used -
- qtrx->qt_blk_res);
- }
- } else {
- /*
- * These blks were never reserved, either inside
- * a transaction or outside one (in a delayed
- * allocation). Also, this isn't always a
- * negative number since we sometimes
- * deliberately skip quota reservations.
- */
- if (qtrx->qt_bcount_delta) {
- dqp->q_res_bcount +=
- (xfs_qcnt_t)qtrx->qt_bcount_delta;
- }
- }
- /*
- * Adjust the RT reservation.
- */
- if (qtrx->qt_rtblk_res != 0) {
- if (qtrx->qt_rtblk_res != qtrx->qt_rtblk_res_used) {
- if (qtrx->qt_rtblk_res >
- qtrx->qt_rtblk_res_used)
- dqp->q_res_rtbcount -= (xfs_qcnt_t)
- (qtrx->qt_rtblk_res -
- qtrx->qt_rtblk_res_used);
- else
- dqp->q_res_rtbcount -= (xfs_qcnt_t)
- (qtrx->qt_rtblk_res_used -
- qtrx->qt_rtblk_res);
- }
- } else {
- if (qtrx->qt_rtbcount_delta)
- dqp->q_res_rtbcount +=
- (xfs_qcnt_t)qtrx->qt_rtbcount_delta;
- }
-
- /*
- * Adjust the inode reservation.
- */
- if (qtrx->qt_ino_res != 0) {
- ASSERT(qtrx->qt_ino_res >=
- qtrx->qt_ino_res_used);
- if (qtrx->qt_ino_res > qtrx->qt_ino_res_used)
- dqp->q_res_icount -= (xfs_qcnt_t)
- (qtrx->qt_ino_res -
- qtrx->qt_ino_res_used);
- } else {
- if (qtrx->qt_icount_delta)
- dqp->q_res_icount +=
- (xfs_qcnt_t)qtrx->qt_icount_delta;
- }
-
- ASSERT(dqp->q_res_bcount >=
- be64_to_cpu(dqp->q_core.d_bcount));
- ASSERT(dqp->q_res_icount >=
- be64_to_cpu(dqp->q_core.d_icount));
- ASSERT(dqp->q_res_rtbcount >=
- be64_to_cpu(dqp->q_core.d_rtbcount));
- }
- /*
- * Do the group quotas next
- */
- qa = tp->t_dqinfo->dqa_grpdquots;
- }
-}
-
-/*
- * Release the reservations, and adjust the dquots accordingly.
- * This is called only when the transaction is being aborted. If by
- * any chance we have done dquot modifications incore (ie. deltas) already,
- * we simply throw those away, since that's the expected behavior
- * when a transaction is curtailed without a commit.
- */
-void
-xfs_trans_unreserve_and_mod_dquots(
- xfs_trans_t *tp)
-{
- int i, j;
- xfs_dquot_t *dqp;
- xfs_dqtrx_t *qtrx, *qa;
- boolean_t locked;
-
- if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
- return;
-
- qa = tp->t_dqinfo->dqa_usrdquots;
-
- for (j = 0; j < 2; j++) {
- for (i = 0; i < XFS_QM_TRANS_MAXDQS; i++) {
- qtrx = &qa[i];
- /*
- * We assume that the array of dquots is filled
- * sequentially, not sparsely.
- */
- if ((dqp = qtrx->qt_dquot) == NULL)
- break;
- /*
- * Unreserve the original reservation. We don't care
- * about the number of blocks used field, or deltas.
- * Also we don't bother to zero the fields.
- */
- locked = B_FALSE;
- if (qtrx->qt_blk_res) {
- xfs_dqlock(dqp);
- locked = B_TRUE;
- dqp->q_res_bcount -=
- (xfs_qcnt_t)qtrx->qt_blk_res;
- }
- if (qtrx->qt_ino_res) {
- if (!locked) {
- xfs_dqlock(dqp);
- locked = B_TRUE;
- }
- dqp->q_res_icount -=
- (xfs_qcnt_t)qtrx->qt_ino_res;
- }
-
- if (qtrx->qt_rtblk_res) {
- if (!locked) {
- xfs_dqlock(dqp);
- locked = B_TRUE;
- }
- dqp->q_res_rtbcount -=
- (xfs_qcnt_t)qtrx->qt_rtblk_res;
- }
- if (locked)
- xfs_dqunlock(dqp);
-
- }
- qa = tp->t_dqinfo->dqa_grpdquots;
- }
-}
-
-STATIC void
-xfs_quota_warn(
- struct xfs_mount *mp,
- struct xfs_dquot *dqp,
- int type)
-{
- /* no warnings for project quotas - we just return ENOSPC later */
- if (dqp->dq_flags & XFS_DQ_PROJ)
- return;
- quota_send_warning((dqp->dq_flags & XFS_DQ_USER) ? USRQUOTA : GRPQUOTA,
- be32_to_cpu(dqp->q_core.d_id), mp->m_super->s_dev,
- type);
-}
-
-/*
- * This reserves disk blocks and inodes against a dquot.
- * Flags indicate if the dquot is to be locked here and also
- * if the blk reservation is for RT or regular blocks.
- * Sending in XFS_QMOPT_FORCE_RES flag skips the quota check.
- */
-STATIC int
-xfs_trans_dqresv(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_dquot_t *dqp,
- long nblks,
- long ninos,
- uint flags)
-{
- xfs_qcnt_t hardlimit;
- xfs_qcnt_t softlimit;
- time_t timer;
- xfs_qwarncnt_t warns;
- xfs_qwarncnt_t warnlimit;
- xfs_qcnt_t count;
- xfs_qcnt_t *resbcountp;
- xfs_quotainfo_t *q = mp->m_quotainfo;
-
-
- xfs_dqlock(dqp);
-
- if (flags & XFS_TRANS_DQ_RES_BLKS) {
- hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
- if (!hardlimit)
- hardlimit = q->qi_bhardlimit;
- softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
- if (!softlimit)
- softlimit = q->qi_bsoftlimit;
- timer = be32_to_cpu(dqp->q_core.d_btimer);
- warns = be16_to_cpu(dqp->q_core.d_bwarns);
- warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
- resbcountp = &dqp->q_res_bcount;
- } else {
- ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
- hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
- if (!hardlimit)
- hardlimit = q->qi_rtbhardlimit;
- softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
- if (!softlimit)
- softlimit = q->qi_rtbsoftlimit;
- timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
- warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
- warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
- resbcountp = &dqp->q_res_rtbcount;
- }
-
- if ((flags & XFS_QMOPT_FORCE_RES) == 0 &&
- dqp->q_core.d_id &&
- ((XFS_IS_UQUOTA_ENFORCED(dqp->q_mount) && XFS_QM_ISUDQ(dqp)) ||
- (XFS_IS_OQUOTA_ENFORCED(dqp->q_mount) &&
- (XFS_QM_ISPDQ(dqp) || XFS_QM_ISGDQ(dqp))))) {
-#ifdef QUOTADEBUG
- xfs_debug(mp,
- "BLK Res: nblks=%ld + resbcount=%Ld > hardlimit=%Ld?",
- nblks, *resbcountp, hardlimit);
-#endif
- if (nblks > 0) {
- /*
- * dquot is locked already. See if we'd go over the
- * hardlimit or exceed the timelimit if we allocate
- * nblks.
- */
- if (hardlimit > 0ULL &&
- hardlimit <= nblks + *resbcountp) {
- xfs_quota_warn(mp, dqp, QUOTA_NL_BHARDWARN);
- goto error_return;
- }
- if (softlimit > 0ULL &&
- softlimit <= nblks + *resbcountp) {
- if ((timer != 0 && get_seconds() > timer) ||
- (warns != 0 && warns >= warnlimit)) {
- xfs_quota_warn(mp, dqp,
- QUOTA_NL_BSOFTLONGWARN);
- goto error_return;
- }
-
- xfs_quota_warn(mp, dqp, QUOTA_NL_BSOFTWARN);
- }
- }
- if (ninos > 0) {
- count = be64_to_cpu(dqp->q_core.d_icount);
- timer = be32_to_cpu(dqp->q_core.d_itimer);
- warns = be16_to_cpu(dqp->q_core.d_iwarns);
- warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
- hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
- if (!hardlimit)
- hardlimit = q->qi_ihardlimit;
- softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
- if (!softlimit)
- softlimit = q->qi_isoftlimit;
-
- if (hardlimit > 0ULL && count >= hardlimit) {
- xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
- goto error_return;
- }
- if (softlimit > 0ULL && count >= softlimit) {
- if ((timer != 0 && get_seconds() > timer) ||
- (warns != 0 && warns >= warnlimit)) {
- xfs_quota_warn(mp, dqp,
- QUOTA_NL_ISOFTLONGWARN);
- goto error_return;
- }
- xfs_quota_warn(mp, dqp, QUOTA_NL_ISOFTWARN);
- }
- }
- }
-
- /*
- * Change the reservation, but not the actual usage.
- * Note that q_res_bcount = q_core.d_bcount + resv
- */
- (*resbcountp) += (xfs_qcnt_t)nblks;
- if (ninos != 0)
- dqp->q_res_icount += (xfs_qcnt_t)ninos;
-
- /*
- * note the reservation amt in the trans struct too,
- * so that the transaction knows how much was reserved by
- * it against this particular dquot.
- * We don't do this when we are reserving for a delayed allocation,
- * because we don't have the luxury of a transaction envelope then.
- */
- if (tp) {
- ASSERT(tp->t_dqinfo);
- ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
- if (nblks != 0)
- xfs_trans_mod_dquot(tp, dqp,
- flags & XFS_QMOPT_RESBLK_MASK,
- nblks);
- if (ninos != 0)
- xfs_trans_mod_dquot(tp, dqp,
- XFS_TRANS_DQ_RES_INOS,
- ninos);
- }
- ASSERT(dqp->q_res_bcount >= be64_to_cpu(dqp->q_core.d_bcount));
- ASSERT(dqp->q_res_rtbcount >= be64_to_cpu(dqp->q_core.d_rtbcount));
- ASSERT(dqp->q_res_icount >= be64_to_cpu(dqp->q_core.d_icount));
-
- xfs_dqunlock(dqp);
- return 0;
-
-error_return:
- xfs_dqunlock(dqp);
- if (flags & XFS_QMOPT_ENOSPC)
- return ENOSPC;
- return EDQUOT;
-}
-
-
-/*
- * Given dquot(s), make disk block and/or inode reservations against them.
- * The fact that this does the reservation against both the usr and
- * grp/prj quotas is important, because this follows a both-or-nothing
- * approach.
- *
- * flags = XFS_QMOPT_FORCE_RES evades limit enforcement. Used by chown.
- * XFS_QMOPT_ENOSPC returns ENOSPC not EDQUOT. Used by pquota.
- * XFS_TRANS_DQ_RES_BLKS reserves regular disk blocks
- * XFS_TRANS_DQ_RES_RTBLKS reserves realtime disk blocks
- * dquots are unlocked on return, if they were not locked by caller.
- */
-int
-xfs_trans_reserve_quota_bydquots(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_dquot_t *udqp,
- xfs_dquot_t *gdqp,
- long nblks,
- long ninos,
- uint flags)
-{
- int resvd = 0, error;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return 0;
-
- if (tp && tp->t_dqinfo == NULL)
- xfs_trans_alloc_dqinfo(tp);
-
- ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-
- if (udqp) {
- error = xfs_trans_dqresv(tp, mp, udqp, nblks, ninos,
- (flags & ~XFS_QMOPT_ENOSPC));
- if (error)
- return error;
- resvd = 1;
- }
-
- if (gdqp) {
- error = xfs_trans_dqresv(tp, mp, gdqp, nblks, ninos, flags);
- if (error) {
- /*
- * can't do it, so backout previous reservation
- */
- if (resvd) {
- flags |= XFS_QMOPT_FORCE_RES;
- xfs_trans_dqresv(tp, mp, udqp,
- -nblks, -ninos, flags);
- }
- return error;
- }
- }
-
- /*
- * Didn't change anything critical, so, no need to log
- */
- return 0;
-}
-
-
-/*
- * Lock the dquot and change the reservation if we can.
- * This doesn't change the actual usage, just the reservation.
- * The inode sent in is locked.
- */
-int
-xfs_trans_reserve_quota_nblks(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- long nblks,
- long ninos,
- uint flags)
-{
- struct xfs_mount *mp = ip->i_mount;
-
- if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
- return 0;
- if (XFS_IS_PQUOTA_ON(mp))
- flags |= XFS_QMOPT_ENOSPC;
-
- ASSERT(ip->i_ino != mp->m_sb.sb_uquotino);
- ASSERT(ip->i_ino != mp->m_sb.sb_gquotino);
-
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT((flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
- XFS_TRANS_DQ_RES_RTBLKS ||
- (flags & ~(XFS_QMOPT_FORCE_RES | XFS_QMOPT_ENOSPC)) ==
- XFS_TRANS_DQ_RES_BLKS);
-
- /*
- * Reserve nblks against these dquots, with trans as the mediator.
- */
- return xfs_trans_reserve_quota_bydquots(tp, mp,
- ip->i_udquot, ip->i_gdquot,
- nblks, ninos, flags);
-}
-
-/*
- * This routine is called to allocate a quotaoff log item.
- */
-xfs_qoff_logitem_t *
-xfs_trans_get_qoff_item(
- xfs_trans_t *tp,
- xfs_qoff_logitem_t *startqoff,
- uint flags)
-{
- xfs_qoff_logitem_t *q;
-
- ASSERT(tp != NULL);
-
- q = xfs_qm_qoff_logitem_init(tp->t_mountp, startqoff, flags);
- ASSERT(q != NULL);
-
- /*
- * Get a log_item_desc to point at the new item.
- */
- xfs_trans_add_item(tp, &q->qql_item);
- return q;
-}
-
-
-/*
- * This is called to mark the quotaoff logitem as needing
- * to be logged when the transaction is committed. The logitem must
- * already be associated with the given transaction.
- */
-void
-xfs_trans_log_quotaoff_item(
- xfs_trans_t *tp,
- xfs_qoff_logitem_t *qlp)
-{
- tp->t_flags |= XFS_TRANS_DIRTY;
- qlp->qql_item.li_desc->lid_flags |= XFS_LID_DIRTY;
-}
-
-STATIC void
-xfs_trans_alloc_dqinfo(
- xfs_trans_t *tp)
-{
- tp->t_dqinfo = kmem_zone_zalloc(xfs_Gqm->qm_dqtrxzone, KM_SLEEP);
-}
-
-void
-xfs_trans_free_dqinfo(
- xfs_trans_t *tp)
-{
- if (!tp->t_dqinfo)
- return;
- kmem_zone_free(xfs_Gqm->qm_dqtrxzone, tp->t_dqinfo);
- tp->t_dqinfo = NULL;
-}
diff --git a/fs/xfs/support/uuid.c b/fs/xfs/support/uuid.c
deleted file mode 100644
index b83f76b..0000000
--- a/fs/xfs/support/uuid.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include <xfs.h>
-
-/* IRIX interpretation of an uuid_t */
-typedef struct {
- __be32 uu_timelow;
- __be16 uu_timemid;
- __be16 uu_timehi;
- __be16 uu_clockseq;
- __be16 uu_node[3];
-} xfs_uu_t;
-
-/*
- * uuid_getnodeuniq - obtain the node unique fields of a UUID.
- *
- * This is not in any way a standard or condoned UUID function;
- * it just something that's needed for user-level file handles.
- */
-void
-uuid_getnodeuniq(uuid_t *uuid, int fsid [2])
-{
- xfs_uu_t *uup = (xfs_uu_t *)uuid;
-
- fsid[0] = (be16_to_cpu(uup->uu_clockseq) << 16) |
- be16_to_cpu(uup->uu_timemid);
- fsid[1] = be32_to_cpu(uup->uu_timelow);
-}
-
-int
-uuid_is_nil(uuid_t *uuid)
-{
- int i;
- char *cp = (char *)uuid;
-
- if (uuid == NULL)
- return 0;
- /* implied check of version number here... */
- for (i = 0; i < sizeof *uuid; i++)
- if (*cp++) return 0; /* not nil */
- return 1; /* is nil */
-}
-
-int
-uuid_equal(uuid_t *uuid1, uuid_t *uuid2)
-{
- return memcmp(uuid1, uuid2, sizeof(uuid_t)) ? 0 : 1;
-}
diff --git a/fs/xfs/support/uuid.h b/fs/xfs/support/uuid.h
deleted file mode 100644
index 4732d71..0000000
--- a/fs/xfs/support/uuid.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_SUPPORT_UUID_H__
-#define __XFS_SUPPORT_UUID_H__
-
-typedef struct {
- unsigned char __u_bits[16];
-} uuid_t;
-
-extern int uuid_is_nil(uuid_t *uuid);
-extern int uuid_equal(uuid_t *uuid1, uuid_t *uuid2);
-extern void uuid_getnodeuniq(uuid_t *uuid, int fsid [2]);
-
-#endif /* __XFS_SUPPORT_UUID_H__ */
diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h
index 5ad8ad3..d8b11b7 100644
--- a/fs/xfs/xfs.h
+++ b/fs/xfs/xfs.h
@@ -22,8 +22,8 @@
#define STATIC
#define DEBUG 1
#define XFS_BUF_LOCK_TRACKING 1
-/* #define QUOTADEBUG 1 */
#endif
-#include <linux-2.6/xfs_linux.h>
+#include "xfs_linux.h"
+
#endif /* __XFS_H__ */
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 11dd720..39632d9 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -42,7 +42,6 @@ struct xfs_acl {
#define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1)
#ifdef CONFIG_XFS_POSIX_ACL
-extern int xfs_check_acl(struct inode *inode, int mask, unsigned int flags);
extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl);
extern int xfs_acl_chmod(struct inode *inode);
@@ -52,8 +51,10 @@ extern int posix_acl_default_exists(struct inode *inode);
extern const struct xattr_handler xfs_xattr_acl_access_handler;
extern const struct xattr_handler xfs_xattr_acl_default_handler;
#else
-# define xfs_check_acl NULL
-# define xfs_get_acl(inode, type) NULL
+static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)
+{
+ return NULL;
+}
# define xfs_inherit_acl(inode, default_acl) 0
# define xfs_acl_chmod(inode) 0
# define posix_acl_access_exists(inode) 0
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 6530769..4805f00 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -103,7 +103,7 @@ typedef struct xfs_agf {
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGF_DADDR(mp) ((xfs_daddr_t)(1 << (mp)->m_sectbb_log))
#define XFS_AGF_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGF_DADDR(mp))
-#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_AGF(bp) ((xfs_agf_t *)((bp)->b_addr))
extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
@@ -156,7 +156,7 @@ typedef struct xfs_agi {
/* disk block (xfs_daddr_t) in the AG */
#define XFS_AGI_DADDR(mp) ((xfs_daddr_t)(2 << (mp)->m_sectbb_log))
#define XFS_AGI_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGI_DADDR(mp))
-#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_AGI(bp) ((xfs_agi_t *)((bp)->b_addr))
extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, struct xfs_buf **bpp);
@@ -168,7 +168,7 @@ extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,
#define XFS_AGFL_DADDR(mp) ((xfs_daddr_t)(3 << (mp)->m_sectbb_log))
#define XFS_AGFL_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_AGFL_DADDR(mp))
#define XFS_AGFL_SIZE(mp) ((mp)->m_sb.sb_sectsize / sizeof(xfs_agblock_t))
-#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_AGFL(bp) ((xfs_agfl_t *)((bp)->b_addr))
typedef struct xfs_agfl {
__be32 agfl_bno[1]; /* actually XFS_AGFL_SIZE(mp) */
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 95862bb..896f1d9 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -451,9 +451,8 @@ xfs_alloc_read_agfl(
XFS_FSS_TO_BB(mp, 1), 0, &bp);
if (error)
return error;
- ASSERT(bp);
- ASSERT(!XFS_BUF_GETERROR(bp));
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
+ ASSERT(!xfs_buf_geterror(bp));
+ xfs_buf_set_ref(bp, XFS_AGFL_REF);
*bpp = bp;
return 0;
}
@@ -570,9 +569,7 @@ xfs_alloc_ag_vextent_exact(
xfs_agblock_t tbno; /* start block of trimmed extent */
xfs_extlen_t tlen; /* length of trimmed extent */
xfs_agblock_t tend; /* end block of trimmed extent */
- xfs_agblock_t end; /* end of allocated extent */
int i; /* success/failure of operation */
- xfs_extlen_t rlen; /* length of returned extent */
ASSERT(args->alignment == 1);
@@ -625,18 +622,16 @@ xfs_alloc_ag_vextent_exact(
*
* Fix the length according to mod and prod if given.
*/
- end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
- args->len = end - args->agbno;
+ args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
+ - args->agbno;
xfs_alloc_fix_len(args);
if (!xfs_alloc_fix_minleft(args))
goto not_found;
- rlen = args->len;
- ASSERT(args->agbno + rlen <= tend);
- end = args->agbno + rlen;
+ ASSERT(args->agbno + args->len <= tend);
/*
- * We are allocating agbno for rlen [agbno .. end]
+ * We are allocating agbno for args->len
* Allocate/initialize a cursor for the by-size btree.
*/
cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
@@ -1080,12 +1075,13 @@ restart:
* If we couldn't get anything, give up.
*/
if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
+ xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+
if (!forced++) {
trace_xfs_alloc_near_busy(args);
xfs_log_force(args->mp, XFS_LOG_SYNC);
goto restart;
}
-
trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK;
return 0;
@@ -2120,14 +2116,14 @@ xfs_read_agf(
if (!*bpp)
return 0;
- ASSERT(!XFS_BUF_GETERROR(*bpp));
+ ASSERT(!(*bpp)->b_error);
agf = XFS_BUF_TO_AGF(*bpp);
/*
* Validate the magic number of the agf block.
*/
agf_ok =
- be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
+ agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) &&
XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
@@ -2144,7 +2140,7 @@ xfs_read_agf(
xfs_trans_brelse(tp, *bpp);
return XFS_ERROR(EFSCORRUPTED);
}
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
+ xfs_buf_set_ref(*bpp, XFS_AGF_REF);
return 0;
}
@@ -2172,7 +2168,7 @@ xfs_alloc_read_agf(
return error;
if (!*bpp)
return 0;
- ASSERT(!XFS_BUF_GETERROR(*bpp));
+ ASSERT(!(*bpp)->b_error);
agf = XFS_BUF_TO_AGF(*bpp);
pag = xfs_perag_get(mp, agno);
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index 2b35188..ffb3386 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -31,7 +31,6 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -311,72 +310,6 @@ xfs_allocbt_recs_inorder(
}
#endif /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t *xfs_allocbt_trace_buf;
-
-STATIC void
-xfs_allocbt_trace_enter(
- struct xfs_btree_cur *cur,
- const char *func,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- ktrace_enter(xfs_allocbt_trace_buf, (void *)(__psint_t)type,
- (void *)func, (void *)s, NULL, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-
-STATIC void
-xfs_allocbt_trace_cursor(
- struct xfs_btree_cur *cur,
- __uint32_t *s0,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *s0 = cur->bc_private.a.agno;
- *l0 = cur->bc_rec.a.ar_startblock;
- *l1 = cur->bc_rec.a.ar_blockcount;
-}
-
-STATIC void
-xfs_allocbt_trace_key(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *key,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *l0 = be32_to_cpu(key->alloc.ar_startblock);
- *l1 = be32_to_cpu(key->alloc.ar_blockcount);
-}
-
-STATIC void
-xfs_allocbt_trace_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- __uint64_t *l0,
- __uint64_t *l1,
- __uint64_t *l2)
-{
- *l0 = be32_to_cpu(rec->alloc.ar_startblock);
- *l1 = be32_to_cpu(rec->alloc.ar_blockcount);
- *l2 = 0;
-}
-#endif /* XFS_BTREE_TRACE */
-
static const struct xfs_btree_ops xfs_allocbt_ops = {
.rec_len = sizeof(xfs_alloc_rec_t),
.key_len = sizeof(xfs_alloc_key_t),
@@ -393,18 +326,10 @@ static const struct xfs_btree_ops xfs_allocbt_ops = {
.init_rec_from_cur = xfs_allocbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_allocbt_init_ptr_from_cur,
.key_diff = xfs_allocbt_key_diff,
-
#ifdef DEBUG
.keys_inorder = xfs_allocbt_keys_inorder,
.recs_inorder = xfs_allocbt_recs_inorder,
#endif
-
-#ifdef XFS_BTREE_TRACE
- .trace_enter = xfs_allocbt_trace_enter,
- .trace_cursor = xfs_allocbt_trace_cursor,
- .trace_key = xfs_allocbt_trace_key,
- .trace_record = xfs_allocbt_trace_record,
-#endif
};
/*
@@ -427,13 +352,16 @@ xfs_allocbt_init_cursor(
cur->bc_tp = tp;
cur->bc_mp = mp;
- cur->bc_nlevels = be32_to_cpu(agf->agf_levels[btnum]);
cur->bc_btnum = btnum;
cur->bc_blocklog = mp->m_sb.sb_blocklog;
-
cur->bc_ops = &xfs_allocbt_ops;
- if (btnum == XFS_BTNUM_CNT)
+
+ if (btnum == XFS_BTNUM_CNT) {
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]);
cur->bc_flags = XFS_BTREE_LASTREC_UPDATE;
+ } else {
+ cur->bc_nlevels = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]);
+ }
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
diff --git a/fs/xfs/xfs_arch.h b/fs/xfs/xfs_arch.h
deleted file mode 100644
index 0902249..0000000
--- a/fs/xfs/xfs_arch.h
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_ARCH_H__
-#define __XFS_ARCH_H__
-
-#ifndef XFS_BIG_INUMS
-# error XFS_BIG_INUMS must be defined true or false
-#endif
-
-#ifdef __KERNEL__
-
-#include <asm/byteorder.h>
-
-#ifdef __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-#else /* __KERNEL__ */
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-#define XFS_NATIVE_HOST 1
-#else
-#undef XFS_NATIVE_HOST
-#endif
-
-#ifdef XFS_NATIVE_HOST
-#define cpu_to_be16(val) ((__force __be16)(__u16)(val))
-#define cpu_to_be32(val) ((__force __be32)(__u32)(val))
-#define cpu_to_be64(val) ((__force __be64)(__u64)(val))
-#define be16_to_cpu(val) ((__force __u16)(__be16)(val))
-#define be32_to_cpu(val) ((__force __u32)(__be32)(val))
-#define be64_to_cpu(val) ((__force __u64)(__be64)(val))
-#else
-#define cpu_to_be16(val) ((__force __be16)__swab16((__u16)(val)))
-#define cpu_to_be32(val) ((__force __be32)__swab32((__u32)(val)))
-#define cpu_to_be64(val) ((__force __be64)__swab64((__u64)(val)))
-#define be16_to_cpu(val) (__swab16((__force __u16)(__be16)(val)))
-#define be32_to_cpu(val) (__swab32((__force __u32)(__be32)(val)))
-#define be64_to_cpu(val) (__swab64((__force __u64)(__be64)(val)))
-#endif
-
-static inline void be16_add_cpu(__be16 *a, __s16 b)
-{
- *a = cpu_to_be16(be16_to_cpu(*a) + b);
-}
-
-static inline void be32_add_cpu(__be32 *a, __s32 b)
-{
- *a = cpu_to_be32(be32_to_cpu(*a) + b);
-}
-
-static inline void be64_add_cpu(__be64 *a, __s64 b)
-{
- *a = cpu_to_be64(be64_to_cpu(*a) + b);
-}
-
-#endif /* __KERNEL__ */
-
-/*
- * get and set integers from potentially unaligned locations
- */
-
-#define INT_GET_UNALIGNED_16_BE(pointer) \
- ((__u16)((((__u8*)(pointer))[0] << 8) | (((__u8*)(pointer))[1])))
-#define INT_SET_UNALIGNED_16_BE(pointer,value) \
- { \
- ((__u8*)(pointer))[0] = (((value) >> 8) & 0xff); \
- ((__u8*)(pointer))[1] = (((value) ) & 0xff); \
- }
-
-/*
- * In directories inode numbers are stored as unaligned arrays of unsigned
- * 8bit integers on disk.
- *
- * For v1 directories or v2 directories that contain inode numbers that
- * do not fit into 32bit the array has eight members, but the first member
- * is always zero:
- *
- * |unused|48-55|40-47|32-39|24-31|16-23| 8-15| 0- 7|
- *
- * For v2 directories that only contain entries with inode numbers that fit
- * into 32bits a four-member array is used:
- *
- * |24-31|16-23| 8-15| 0- 7|
- */
-
-#define XFS_GET_DIR_INO4(di) \
- (((__u32)(di).i[0] << 24) | ((di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
-
-#define XFS_PUT_DIR_INO4(from, di) \
-do { \
- (di).i[0] = (((from) & 0xff000000ULL) >> 24); \
- (di).i[1] = (((from) & 0x00ff0000ULL) >> 16); \
- (di).i[2] = (((from) & 0x0000ff00ULL) >> 8); \
- (di).i[3] = ((from) & 0x000000ffULL); \
-} while (0)
-
-#define XFS_DI_HI(di) \
- (((__u32)(di).i[1] << 16) | ((di).i[2] << 8) | ((di).i[3]))
-#define XFS_DI_LO(di) \
- (((__u32)(di).i[4] << 24) | ((di).i[5] << 16) | ((di).i[6] << 8) | ((di).i[7]))
-
-#define XFS_GET_DIR_INO8(di) \
- (((xfs_ino_t)XFS_DI_LO(di) & 0xffffffffULL) | \
- ((xfs_ino_t)XFS_DI_HI(di) << 32))
-
-#define XFS_PUT_DIR_INO8(from, di) \
-do { \
- (di).i[0] = 0; \
- (di).i[1] = (((from) & 0x00ff000000000000ULL) >> 48); \
- (di).i[2] = (((from) & 0x0000ff0000000000ULL) >> 40); \
- (di).i[3] = (((from) & 0x000000ff00000000ULL) >> 32); \
- (di).i[4] = (((from) & 0x00000000ff000000ULL) >> 24); \
- (di).i[5] = (((from) & 0x0000000000ff0000ULL) >> 16); \
- (di).i[6] = (((from) & 0x000000000000ff00ULL) >> 8); \
- (di).i[7] = ((from) & 0x00000000000000ffULL); \
-} while (0)
-
-#endif /* __XFS_ARCH_H__ */
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index 99d4011..1e5d97f 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -319,7 +319,7 @@ xfs_attr_set_int(
return (error);
}
- xfs_trans_ijoin(args.trans, dp);
+ xfs_trans_ijoin(args.trans, dp, 0);
/*
* If the attribute list is non-existent or a shortform list,
@@ -389,7 +389,7 @@ xfs_attr_set_int(
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args.trans, dp);
+ xfs_trans_ijoin(args.trans, dp, 0);
/*
* Commit the leaf transformation. We'll need another (linked)
@@ -537,7 +537,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
* No need to make quota reservations here. We expect to release some
* blocks not allocate in the common case.
*/
- xfs_trans_ijoin(args.trans, dp);
+ xfs_trans_ijoin(args.trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
@@ -809,7 +809,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
* No need to make quota reservations here. We expect to release some
* blocks, not allocate, in the common case.
*/
- xfs_trans_ijoin(trans, dp);
+ xfs_trans_ijoin(trans, dp, 0);
/*
* Decide on what work routines to call based on the inode size.
@@ -823,7 +823,7 @@ xfs_attr_inactive(xfs_inode_t *dp)
if (error)
goto out;
- error = xfs_itruncate_finish(&trans, dp, 0LL, XFS_ATTR_FORK, 0);
+ error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0);
if (error)
goto out;
@@ -961,7 +961,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the current trans (including the inode) and start
@@ -1063,7 +1063,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_buf_done(bp);
@@ -1137,7 +1137,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_buf_done(bp);
return(0);
@@ -1191,7 +1191,7 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
return XFS_ERROR(error);
ASSERT(bp != NULL);
leaf = bp->data;
- if (unlikely(be16_to_cpu(leaf->hdr.info.magic) != XFS_ATTR_LEAF_MAGIC)) {
+ if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
xfs_da_brelse(NULL, bp);
@@ -1291,7 +1291,7 @@ restart:
* in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the node conversion and start the next
@@ -1328,7 +1328,7 @@ restart:
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
} else {
/*
* Addition succeeded, update Btree hashvals.
@@ -1440,7 +1440,7 @@ restart:
* in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
}
/*
@@ -1572,7 +1572,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
/*
* Commit the Btree join operation and start a new trans.
@@ -1598,9 +1598,8 @@ xfs_attr_node_removename(xfs_da_args_t *args)
XFS_ATTR_FORK);
if (error)
goto out;
- ASSERT(be16_to_cpu(((xfs_attr_leafblock_t *)
- bp->data)->hdr.info.magic)
- == XFS_ATTR_LEAF_MAGIC);
+ ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
+ cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
xfs_bmap_init(args->flist, args->firstblock);
@@ -1624,7 +1623,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
* in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
} else
xfs_da_brelse(args->trans, bp);
}
@@ -1865,11 +1864,11 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
return(XFS_ERROR(EFSCORRUPTED));
}
node = bp->data;
- if (be16_to_cpu(node->hdr.info.magic)
- == XFS_ATTR_LEAF_MAGIC)
+ if (node->hdr.info.magic ==
+ cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
break;
- if (unlikely(be16_to_cpu(node->hdr.info.magic)
- != XFS_DA_NODE_MAGIC)) {
+ if (unlikely(node->hdr.info.magic !=
+ cpu_to_be16(XFS_DA_NODE_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_node_list(3)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount,
@@ -1904,8 +1903,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
*/
for (;;) {
leaf = bp->data;
- if (unlikely(be16_to_cpu(leaf->hdr.info.magic)
- != XFS_ATTR_LEAF_MAGIC)) {
+ if (unlikely(leaf->hdr.info.magic !=
+ cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
@@ -1964,10 +1963,9 @@ xfs_attr_rmtval_get(xfs_da_args_t *args)
lblkno = args->rmtblkno;
while (valuelen > 0) {
nmap = ATTR_RMTVALUE_MAPSIZE;
- error = xfs_bmapi(args->trans, args->dp, (xfs_fileoff_t)lblkno,
- args->rmtblkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- NULL, 0, map, &nmap, NULL);
+ error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
+ args->rmtblkcnt, map, &nmap,
+ XFS_BMAPI_ATTRFORK);
if (error)
return(error);
ASSERT(nmap >= 1);
@@ -2041,10 +2039,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
- error = xfs_bmapi(args->trans, dp, (xfs_fileoff_t)lblkno,
+ error = xfs_bmapi_write(args->trans, dp, (xfs_fileoff_t)lblkno,
blkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA |
- XFS_BMAPI_WRITE,
+ XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
args->firstblock, args->total, &map, &nmap,
args->flist);
if (!error) {
@@ -2063,7 +2060,7 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, dp);
+ xfs_trans_ijoin(args->trans, dp, 0);
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
@@ -2093,14 +2090,11 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
*/
xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
- error = xfs_bmapi(NULL, dp, (xfs_fileoff_t)lblkno,
- args->rmtblkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- args->firstblock, 0, &map, &nmap,
- NULL);
- if (error) {
+ error = xfs_bmapi_read(dp, (xfs_fileoff_t)lblkno,
+ args->rmtblkcnt, &map, &nmap,
+ XFS_BMAPI_ATTRFORK);
+ if (error)
return(error);
- }
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -2110,17 +2104,17 @@ xfs_attr_rmtval_set(xfs_da_args_t *args)
bp = xfs_buf_get(mp->m_ddev_targp, dblkno, blkcnt,
XBF_LOCK | XBF_DONT_BLOCK);
- ASSERT(bp);
- ASSERT(!XFS_BUF_GETERROR(bp));
-
+ if (!bp)
+ return ENOMEM;
tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen :
XFS_BUF_SIZE(bp);
xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE);
if (tmp < XFS_BUF_SIZE(bp))
xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp);
- if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */
- return (error);
- }
+ error = xfs_bwrite(bp); /* GROT: NOTE: synchronous write */
+ xfs_buf_relse(bp);
+ if (error)
+ return error;
src += tmp;
valuelen -= tmp;
@@ -2156,16 +2150,12 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
/*
* Try to remember where we decided to put the value.
*/
- xfs_bmap_init(args->flist, args->firstblock);
nmap = 1;
- error = xfs_bmapi(NULL, args->dp, (xfs_fileoff_t)lblkno,
- args->rmtblkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- args->firstblock, 0, &map, &nmap,
- args->flist);
- if (error) {
+ error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno,
+ args->rmtblkcnt, &map, &nmap,
+ XFS_BMAPI_ATTRFORK);
+ if (error)
return(error);
- }
ASSERT(nmap == 1);
ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
(map.br_startblock != HOLESTARTBLOCK));
@@ -2178,8 +2168,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
*/
bp = xfs_incore(mp->m_ddev_targp, dblkno, blkcnt, XBF_TRYLOCK);
if (bp) {
- XFS_BUF_STALE(bp);
- XFS_BUF_UNDELAYWRITE(bp);
+ xfs_buf_stale(bp);
xfs_buf_relse(bp);
bp = NULL;
}
@@ -2217,7 +2206,7 @@ xfs_attr_rmtval_remove(xfs_da_args_t *args)
* a new one. We need the inode to be in all transactions.
*/
if (committed)
- xfs_trans_ijoin(args->trans, args->dp);
+ xfs_trans_ijoin(args->trans, args->dp, 0);
/*
* Close out trans and start the next one in the chain.
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index f49ecf2..c1b55e5 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -745,7 +745,7 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
int bytes, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
entry = &leaf->entries[0];
bytes = sizeof(struct xfs_attr_sf_hdr);
@@ -791,7 +791,7 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
ASSERT(bp != NULL);
memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
leaf = (xfs_attr_leafblock_t *)tmpbuffer;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
/*
@@ -886,7 +886,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
goto out;
node = bp1->data;
leaf = bp2->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
/* both on-disk, don't endian-flip twice */
node->btree[0].hashval =
leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
@@ -1011,7 +1011,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
int tablesize, entsize, sum, tmp, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT((args->index >= 0)
&& (args->index <= be16_to_cpu(leaf->hdr.count)));
hdr = &leaf->hdr;
@@ -1084,7 +1084,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
int tmp, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
ASSERT((args->index >= 0) && (args->index <= be16_to_cpu(hdr->count)));
@@ -1270,8 +1270,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
leaf1 = blk1->bp->data;
leaf2 = blk2->bp->data;
- ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
- ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
args = state->args;
/*
@@ -1547,7 +1547,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
*/
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
leaf = (xfs_attr_leafblock_t *)info;
count = be16_to_cpu(leaf->hdr.count);
bytes = sizeof(xfs_attr_leaf_hdr_t) +
@@ -1610,7 +1610,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
bytes = state->blocksize - (state->blocksize>>2);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
count += be16_to_cpu(leaf->hdr.count);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
bytes -= count * sizeof(xfs_attr_leaf_entry_t);
@@ -1664,7 +1664,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
xfs_mount_t *mp;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
mp = args->trans->t_mountp;
ASSERT((be16_to_cpu(hdr->count) > 0)
@@ -1827,8 +1827,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
drop_leaf = drop_blk->bp->data;
save_leaf = save_blk->bp->data;
- ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
- ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
drop_hdr = &drop_leaf->hdr;
save_hdr = &save_leaf->hdr;
@@ -1929,7 +1929,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
xfs_dahash_t hashval;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
< (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2033,7 +2033,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
xfs_attr_leaf_name_remote_t *name_rmt;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
< (XFS_LBSIZE(args->dp->i_mount)/8));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
@@ -2101,8 +2101,8 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
/*
* Set up environment.
*/
- ASSERT(be16_to_cpu(leaf_s->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
- ASSERT(be16_to_cpu(leaf_d->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf_s->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ ASSERT(leaf_d->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr_s = &leaf_s->hdr;
hdr_d = &leaf_d->hdr;
ASSERT((be16_to_cpu(hdr_s->count) > 0) &&
@@ -2236,8 +2236,8 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
leaf1 = leaf1_bp->data;
leaf2 = leaf2_bp->data;
- ASSERT((be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC) &&
- (be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC));
+ ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
+ (leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
(be16_to_cpu(leaf2->hdr.count) > 0) &&
((be32_to_cpu(leaf2->entries[0].hashval) <
@@ -2260,7 +2260,7 @@ xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
xfs_attr_leafblock_t *leaf;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if (count)
*count = be16_to_cpu(leaf->hdr.count);
if (!leaf->hdr.count)
@@ -2279,7 +2279,7 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)
xfs_attr_leaf_name_remote_t *name_rmt;
int size;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if (leaf->entries[index].flags & XFS_ATTR_LOCAL) {
name_loc = xfs_attr_leaf_name_local(leaf, index);
size = xfs_attr_leaf_entsize_local(name_loc->namelen,
@@ -2465,7 +2465,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
ASSERT(bp != NULL);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
entry = &leaf->entries[ args->index ];
@@ -2529,7 +2529,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
ASSERT(bp != NULL);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
entry = &leaf->entries[ args->index ];
@@ -2599,13 +2599,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
}
leaf1 = bp1->data;
- ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
ASSERT(args->index >= 0);
entry1 = &leaf1->entries[ args->index ];
leaf2 = bp2->data;
- ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
ASSERT(args->index2 >= 0);
entry2 = &leaf2->entries[ args->index2 ];
@@ -2703,9 +2703,9 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
* This is a depth-first traversal!
*/
info = bp->data;
- if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
+ if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
error = xfs_attr_node_inactive(trans, dp, bp, 1);
- } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
+ } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
error = xfs_attr_leaf_inactive(trans, dp, bp);
} else {
error = XFS_ERROR(EIO);
@@ -2753,7 +2753,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
}
node = bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
parent_blkno = xfs_da_blkno(bp); /* save for re-read later */
count = be16_to_cpu(node->hdr.count);
if (!count) {
@@ -2787,10 +2787,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
* Invalidate the subtree, however we have to.
*/
info = child_bp->data;
- if (be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC) {
+ if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
error = xfs_attr_node_inactive(trans, dp,
child_bp, level+1);
- } else if (be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC) {
+ } else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
error = xfs_attr_leaf_inactive(trans, dp,
child_bp);
} else {
@@ -2850,7 +2850,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
int error, count, size, tmp, i;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
/*
* Count the number of "remote" value extents.
@@ -2940,9 +2940,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
* Try to remember where we decided to put the value.
*/
nmap = 1;
- error = xfs_bmapi(*trans, dp, (xfs_fileoff_t)tblkno, tblkcnt,
- XFS_BMAPI_ATTRFORK | XFS_BMAPI_METADATA,
- NULL, 0, &map, &nmap, NULL);
+ error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
+ &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error) {
return(error);
}
@@ -2962,6 +2961,8 @@ xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
bp = xfs_trans_get_buf(*trans,
dp->i_mount->m_ddev_targp,
dblkno, dblkcnt, XBF_LOCK);
+ if (!bp)
+ return ENOMEM;
xfs_trans_binval(*trans, bp);
/*
* Roll to next transaction.
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 9c7d22f..c782906 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -111,8 +111,15 @@ typedef struct xfs_attr_leaf_name_remote {
typedef struct xfs_attr_leafblock {
xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
- xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
- xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
+ /*
+ * The rest of the block contains the following structures after the
+ * leaf entries, growing from the bottom up. The variables are never
+ * referenced and definining them can actually make gcc optimize away
+ * accesses to the 'entries' array above index 0 so don't do that.
+ *
+ * xfs_attr_leaf_name_local_t namelist;
+ * xfs_attr_leaf_name_remote_t valuelist;
+ */
} xfs_attr_leafblock_t;
/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index a175933..d0ab788 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -29,15 +29,11 @@
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
#include "xfs_mount.h"
#include "xfs_itable.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
#include "xfs_inode_item.h"
#include "xfs_extfree_item.h"
#include "xfs_alloc.h"
@@ -54,17 +50,22 @@
#include "xfs_trace.h"
-#ifdef DEBUG
-STATIC void
-xfs_bmap_check_leaf_extents(xfs_btree_cur_t *cur, xfs_inode_t *ip, int whichfork);
-#endif
-
kmem_zone_t *xfs_bmap_free_item_zone;
/*
* Prototypes for internal bmap routines.
*/
+#ifdef DEBUG
+STATIC void
+xfs_bmap_check_leaf_extents(
+ struct xfs_btree_cur *cur,
+ struct xfs_inode *ip,
+ int whichfork);
+#else
+#define xfs_bmap_check_leaf_extents(cur, ip, whichfork) do { } while (0)
+#endif
+
/*
* Called from xfs_bmap_add_attrfork to handle extents format files.
@@ -89,57 +90,6 @@ xfs_bmap_add_attrfork_local(
int *flags); /* inode logging flags */
/*
- * Called by xfs_bmap_add_extent to handle cases converting a delayed
- * allocation to a real allocation.
- */
-STATIC int /* error */
-xfs_bmap_add_extent_delay_real(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */
- xfs_fsblock_t *first, /* pointer to firstblock variable */
- xfs_bmap_free_t *flist, /* list of extents to be freed */
- int *logflagsp); /* inode logging flags */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a delayed allocation.
- */
-STATIC int /* error */
-xfs_bmap_add_extent_hole_delay(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp); /* inode logging flags */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a real allocation.
- */
-STATIC int /* error */
-xfs_bmap_add_extent_hole_real(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_btree_cur_t *cur, /* if null, not a btree */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp, /* inode logging flags */
- int whichfork); /* data or attr fork */
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting an unwritten
- * allocation to a real allocation or vice versa.
- */
-STATIC int /* error */
-xfs_bmap_add_extent_unwritten_real(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp); /* inode logging flags */
-
-/*
* xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
* It figures out where to ask the underlying allocator to put the new extent.
*/
@@ -218,19 +168,6 @@ xfs_bmap_search_extents(
xfs_bmbt_irec_t *prevp); /* out: previous extent entry found */
/*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
- */
-STATIC int /* error */
-xfs_bmap_isaeof(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_fileoff_t off, /* file offset in fsblocks */
- int whichfork, /* data or attribute fork */
- char *aeof); /* return value */
-
-/*
* Compute the worst-case number of indirect blocks that will be used
* for ip's delayed extent of length "len".
*/
@@ -417,7 +354,7 @@ xfs_bmap_add_attrfork_local(
if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
return 0;
- if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(ip->i_d.di_mode)) {
mp = ip->i_mount;
memset(&dargs, 0, sizeof(dargs));
dargs.dp = ip;
@@ -434,186 +371,13 @@ xfs_bmap_add_attrfork_local(
}
/*
- * Called by xfs_bmapi to update file extent records and the btree
- * after allocating space (or doing a delayed allocation).
- */
-STATIC int /* error */
-xfs_bmap_add_extent(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- xfs_fsblock_t *first, /* pointer to firstblock variable */
- xfs_bmap_free_t *flist, /* list of extents to be freed */
- int *logflagsp, /* inode logging flags */
- int whichfork) /* data or attr fork */
-{
- xfs_btree_cur_t *cur; /* btree cursor or null */
- xfs_filblks_t da_new; /* new count del alloc blocks used */
- xfs_filblks_t da_old; /* old count del alloc blocks used */
- int error; /* error return value */
- xfs_ifork_t *ifp; /* inode fork ptr */
- int logflags; /* returned value */
- xfs_extnum_t nextents; /* number of extents in file now */
-
- XFS_STATS_INC(xs_add_exlist);
-
- cur = *curp;
- ifp = XFS_IFORK_PTR(ip, whichfork);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- da_old = da_new = 0;
- error = 0;
-
- ASSERT(*idx >= 0);
- ASSERT(*idx <= nextents);
-
- /*
- * This is the first extent added to a new/empty file.
- * Special case this one, so other routines get to assume there are
- * already extents in the list.
- */
- if (nextents == 0) {
- xfs_iext_insert(ip, *idx, 1, new,
- whichfork == XFS_ATTR_FORK ? BMAP_ATTRFORK : 0);
-
- ASSERT(cur == NULL);
-
- if (!isnullstartblock(new->br_startblock)) {
- XFS_IFORK_NEXT_SET(ip, whichfork, 1);
- logflags = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
- } else
- logflags = 0;
- }
- /*
- * Any kind of new delayed allocation goes here.
- */
- else if (isnullstartblock(new->br_startblock)) {
- if (cur)
- ASSERT((cur->bc_private.b.flags &
- XFS_BTCUR_BPRV_WASDEL) == 0);
- error = xfs_bmap_add_extent_hole_delay(ip, idx, new,
- &logflags);
- }
- /*
- * Real allocation off the end of the file.
- */
- else if (*idx == nextents) {
- if (cur)
- ASSERT((cur->bc_private.b.flags &
- XFS_BTCUR_BPRV_WASDEL) == 0);
- error = xfs_bmap_add_extent_hole_real(ip, idx, cur, new,
- &logflags, whichfork);
- } else {
- xfs_bmbt_irec_t prev; /* old extent at offset idx */
-
- /*
- * Get the record referred to by idx.
- */
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &prev);
- /*
- * If it's a real allocation record, and the new allocation ends
- * after the start of the referred to record, then we're filling
- * in a delayed or unwritten allocation with a real one, or
- * converting real back to unwritten.
- */
- if (!isnullstartblock(new->br_startblock) &&
- new->br_startoff + new->br_blockcount > prev.br_startoff) {
- if (prev.br_state != XFS_EXT_UNWRITTEN &&
- isnullstartblock(prev.br_startblock)) {
- da_old = startblockval(prev.br_startblock);
- if (cur)
- ASSERT(cur->bc_private.b.flags &
- XFS_BTCUR_BPRV_WASDEL);
- error = xfs_bmap_add_extent_delay_real(ip,
- idx, &cur, new, &da_new,
- first, flist, &logflags);
- } else {
- ASSERT(new->br_state == XFS_EXT_NORM ||
- new->br_state == XFS_EXT_UNWRITTEN);
-
- error = xfs_bmap_add_extent_unwritten_real(ip,
- idx, &cur, new, &logflags);
- if (error)
- goto done;
- }
- }
- /*
- * Otherwise we're filling in a hole with an allocation.
- */
- else {
- if (cur)
- ASSERT((cur->bc_private.b.flags &
- XFS_BTCUR_BPRV_WASDEL) == 0);
- error = xfs_bmap_add_extent_hole_real(ip, idx, cur,
- new, &logflags, whichfork);
- }
- }
-
- if (error)
- goto done;
- ASSERT(*curp == cur || *curp == NULL);
-
- /*
- * Convert to a btree if necessary.
- */
- if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(ip, whichfork) > ifp->if_ext_max) {
- int tmp_logflags; /* partial log flag return val */
-
- ASSERT(cur == NULL);
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip, first,
- flist, &cur, da_old > 0, &tmp_logflags, whichfork);
- logflags |= tmp_logflags;
- if (error)
- goto done;
- }
- /*
- * Adjust for changes in reserved delayed indirect blocks.
- * Nothing to do for disk quotas here.
- */
- if (da_old || da_new) {
- xfs_filblks_t nblks;
-
- nblks = da_new;
- if (cur)
- nblks += cur->bc_private.b.allocated;
- ASSERT(nblks <= da_old);
- if (nblks < da_old)
- xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
- (int64_t)(da_old - nblks), 0);
- }
- /*
- * Clear out the allocated field, done with it now in any case.
- */
- if (cur) {
- cur->bc_private.b.allocated = 0;
- *curp = cur;
- }
-done:
-#ifdef DEBUG
- if (!error)
- xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
-#endif
- *logflagsp = logflags;
- return error;
-}
-
-/*
- * Called by xfs_bmap_add_extent to handle cases converting a delayed
- * allocation to a real allocation.
+ * Convert a delayed allocation to a real allocation.
*/
STATIC int /* error */
xfs_bmap_add_extent_delay_real(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- xfs_filblks_t *dnew, /* new delayed-alloc indirect blocks */
- xfs_fsblock_t *first, /* pointer to firstblock variable */
- xfs_bmap_free_t *flist, /* list of extents to be freed */
- int *logflagsp) /* inode logging flags */
+ struct xfs_bmalloca *bma)
{
- xfs_btree_cur_t *cur; /* btree cursor */
+ struct xfs_bmbt_irec *new = &bma->got;
int diff; /* temp value */
xfs_bmbt_rec_host_t *ep; /* extent entry for idx */
int error; /* error return value */
@@ -624,10 +388,22 @@ xfs_bmap_add_extent_delay_real(
/* left is 0, right is 1, prev is 2 */
int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */
- xfs_filblks_t temp=0; /* value for dnew calculations */
- xfs_filblks_t temp2=0;/* value for dnew calculations */
+ xfs_filblks_t da_new; /* new count del alloc blocks used */
+ xfs_filblks_t da_old; /* old count del alloc blocks used */
+ xfs_filblks_t temp=0; /* value for da_new calculations */
+ xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
+ ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+
+ ASSERT(bma->idx >= 0);
+ ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(!isnullstartblock(new->br_startblock));
+ ASSERT(!bma->cur ||
+ (bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+
+ XFS_STATS_INC(xs_add_exlist);
+
#define LEFT r[0]
#define RIGHT r[1]
#define PREV r[2]
@@ -635,14 +411,15 @@ xfs_bmap_add_extent_delay_real(
/*
* Set up a bunch of variables to make the tests simpler.
*/
- cur = *curp;
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
- ep = xfs_iext_get_ext(ifp, *idx);
+ ep = xfs_iext_get_ext(ifp, bma->idx);
xfs_bmbt_get_all(ep, &PREV);
new_endoff = new->br_startoff + new->br_blockcount;
ASSERT(PREV.br_startoff <= new->br_startoff);
ASSERT(PREV.br_startoff + PREV.br_blockcount >= new_endoff);
+ da_old = startblockval(PREV.br_startblock);
+ da_new = 0;
+
/*
* Set flags determining what part of the previous delayed allocation
* extent is being replaced by a real allocation.
@@ -656,9 +433,9 @@ xfs_bmap_add_extent_delay_real(
* Check and set flags if this segment has a left neighbor.
* Don't set contiguous if the combined extent would be too large.
*/
- if (*idx > 0) {
+ if (bma->idx > 0) {
state |= BMAP_LEFT_VALID;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &LEFT);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &LEFT);
if (isnullstartblock(LEFT.br_startblock))
state |= BMAP_LEFT_DELAY;
@@ -676,9 +453,9 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (*idx < ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
state |= BMAP_RIGHT_VALID;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
if (isnullstartblock(RIGHT.br_startblock))
state |= BMAP_RIGHT_DELAY;
@@ -709,38 +486,41 @@ xfs_bmap_add_extent_delay_real(
* Filling in all of a previously delayed allocation extent.
* The left and right neighbors are both contiguous with new.
*/
- --*idx;
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
+ bma->idx--;
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
LEFT.br_blockcount + PREV.br_blockcount +
RIGHT.br_blockcount);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_iext_remove(ip, *idx + 1, 2, state);
- ip->i_d.di_nextents--;
- if (cur == NULL)
+ xfs_iext_remove(bma->ip, bma->idx + 1, 2, state);
+ bma->ip->i_d.di_nextents--;
+ if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
RIGHT.br_startblock,
- RIGHT.br_blockcount, &i)))
+ RIGHT.br_blockcount, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_btree_delete(cur, &i)))
+ error = xfs_btree_delete(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_btree_decrement(cur, 0, &i)))
+ error = xfs_btree_decrement(bma->cur, 0, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+ error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
PREV.br_blockcount +
- RIGHT.br_blockcount, LEFT.br_state)))
+ RIGHT.br_blockcount, LEFT.br_state);
+ if (error)
goto done;
}
- *dnew = 0;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_LEFT_CONTIG:
@@ -748,30 +528,31 @@ xfs_bmap_add_extent_delay_real(
* Filling in all of a previously delayed allocation extent.
* The left neighbor is contiguous, the right is not.
*/
- --*idx;
+ bma->idx--;
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
LEFT.br_blockcount + PREV.br_blockcount);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_iext_remove(ip, *idx + 1, 1, state);
- if (cur == NULL)
+ xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+ if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
- if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
LEFT.br_startblock, LEFT.br_blockcount,
- &i)))
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+ error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
- PREV.br_blockcount, LEFT.br_state)))
+ PREV.br_blockcount, LEFT.br_state);
+ if (error)
goto done;
}
- *dnew = 0;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -779,30 +560,30 @@ xfs_bmap_add_extent_delay_real(
* Filling in all of a previously delayed allocation extent.
* The right neighbor is contiguous, the left is not.
*/
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startblock(ep, new->br_startblock);
xfs_bmbt_set_blockcount(ep,
PREV.br_blockcount + RIGHT.br_blockcount);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_iext_remove(ip, *idx + 1, 1, state);
- if (cur == NULL)
+ xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
+ if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
- if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
RIGHT.br_startblock,
- RIGHT.br_blockcount, &i)))
+ RIGHT.br_blockcount, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, PREV.br_startoff,
+ error = xfs_bmbt_update(bma->cur, PREV.br_startoff,
new->br_startblock,
PREV.br_blockcount +
- RIGHT.br_blockcount, PREV.br_state)))
+ RIGHT.br_blockcount, PREV.br_state);
+ if (error)
goto done;
}
-
- *dnew = 0;
break;
case BMAP_LEFT_FILLING | BMAP_RIGHT_FILLING:
@@ -811,27 +592,27 @@ xfs_bmap_add_extent_delay_real(
* Neither the left nor right neighbors are contiguous with
* the new one.
*/
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startblock(ep, new->br_startblock);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- ip->i_d.di_nextents++;
- if (cur == NULL)
+ bma->ip->i_d.di_nextents++;
+ if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
new->br_startblock, new->br_blockcount,
- &i)))
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
- cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_btree_insert(cur, &i)))
+ bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+ error = xfs_btree_insert(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
-
- *dnew = 0;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG:
@@ -839,39 +620,40 @@ xfs_bmap_add_extent_delay_real(
* Filling in the first part of a previous delayed allocation.
* The left neighbor is contiguous.
*/
- trace_xfs_bmap_pre_update(ip, *idx - 1, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx - 1),
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx - 1),
LEFT.br_blockcount + new->br_blockcount);
xfs_bmbt_set_startoff(ep,
PREV.br_startoff + new->br_blockcount);
- trace_xfs_bmap_post_update(ip, *idx - 1, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx - 1, state, _THIS_IP_);
temp = PREV.br_blockcount - new->br_blockcount;
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp);
- if (cur == NULL)
+ if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
- if ((error = xfs_bmbt_lookup_eq(cur, LEFT.br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, LEFT.br_startoff,
LEFT.br_startblock, LEFT.br_blockcount,
- &i)))
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, LEFT.br_startoff,
+ error = xfs_bmbt_update(bma->cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount +
new->br_blockcount,
- LEFT.br_state)))
+ LEFT.br_state);
+ if (error)
goto done;
}
- temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- --*idx;
- *dnew = temp;
+ bma->idx--;
break;
case BMAP_LEFT_FILLING:
@@ -879,43 +661,43 @@ xfs_bmap_add_extent_delay_real(
* Filling in the first part of a previous delayed allocation.
* The left neighbor is not contiguous.
*/
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_startoff(ep, new_endoff);
temp = PREV.br_blockcount - new->br_blockcount;
xfs_bmbt_set_blockcount(ep, temp);
- xfs_iext_insert(ip, *idx, 1, new, state);
- ip->i_d.di_nextents++;
- if (cur == NULL)
+ xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
+ bma->ip->i_d.di_nextents++;
+ if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
new->br_startblock, new->br_blockcount,
- &i)))
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
- cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_btree_insert(cur, &i)))
+ bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+ error = xfs_btree_insert(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_nextents > ip->i_df.if_ext_max) {
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
- first, flist, &cur, 1, &tmp_rval,
- XFS_DATA_FORK);
+ if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) {
+ error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+ bma->firstblock, bma->flist,
+ &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
rval |= tmp_rval;
if (error)
goto done;
}
- temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock) -
- (cur ? cur->bc_private.b.allocated : 0));
- ep = xfs_iext_get_ext(ifp, *idx + 1);
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_);
-
- *dnew = temp;
+ (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+ ep = xfs_iext_get_ext(ifp, bma->idx + 1);
+ xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+ trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
break;
case BMAP_RIGHT_FILLING | BMAP_RIGHT_CONTIG:
@@ -924,38 +706,39 @@ xfs_bmap_add_extent_delay_real(
* The right neighbor is contiguous with the new allocation.
*/
temp = PREV.br_blockcount - new->br_blockcount;
- trace_xfs_bmap_pre_update(ip, *idx + 1, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp);
- xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx + 1),
+ xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx + 1),
new->br_startoff, new->br_startblock,
new->br_blockcount + RIGHT.br_blockcount,
RIGHT.br_state);
- trace_xfs_bmap_post_update(ip, *idx + 1, state, _THIS_IP_);
- if (cur == NULL)
+ trace_xfs_bmap_post_update(bma->ip, bma->idx + 1, state, _THIS_IP_);
+ if (bma->cur == NULL)
rval = XFS_ILOG_DEXT;
else {
rval = 0;
- if ((error = xfs_bmbt_lookup_eq(cur, RIGHT.br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, RIGHT.br_startoff,
RIGHT.br_startblock,
- RIGHT.br_blockcount, &i)))
+ RIGHT.br_blockcount, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, new->br_startoff,
+ error = xfs_bmbt_update(bma->cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
RIGHT.br_blockcount,
- RIGHT.br_state)))
+ RIGHT.br_state);
+ if (error)
goto done;
}
- temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock));
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- ++*idx;
- *dnew = temp;
+ bma->idx++;
break;
case BMAP_RIGHT_FILLING:
@@ -964,42 +747,43 @@ xfs_bmap_add_extent_delay_real(
* The right neighbor is not contiguous.
*/
temp = PREV.br_blockcount - new->br_blockcount;
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp);
- xfs_iext_insert(ip, *idx + 1, 1, new, state);
- ip->i_d.di_nextents++;
- if (cur == NULL)
+ xfs_iext_insert(bma->ip, bma->idx + 1, 1, new, state);
+ bma->ip->i_d.di_nextents++;
+ if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
new->br_startblock, new->br_blockcount,
- &i)))
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
- cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_btree_insert(cur, &i)))
+ bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+ error = xfs_btree_insert(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_nextents > ip->i_df.if_ext_max) {
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
- first, flist, &cur, 1, &tmp_rval,
- XFS_DATA_FORK);
+ if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) {
+ error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+ bma->firstblock, bma->flist, &bma->cur, 1,
+ &tmp_rval, XFS_DATA_FORK);
rval |= tmp_rval;
if (error)
goto done;
}
- temp = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(ip, temp),
+ da_new = XFS_FILBLKS_MIN(xfs_bmap_worst_indlen(bma->ip, temp),
startblockval(PREV.br_startblock) -
- (cur ? cur->bc_private.b.allocated : 0));
- ep = xfs_iext_get_ext(ifp, *idx);
- xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+ ep = xfs_iext_get_ext(ifp, bma->idx);
+ xfs_bmbt_set_startblock(ep, nullstartblock(da_new));
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- ++*idx;
- *dnew = temp;
+ bma->idx++;
break;
case 0:
@@ -1025,82 +809,65 @@ xfs_bmap_add_extent_delay_real(
*/
temp = new->br_startoff - PREV.br_startoff;
temp2 = PREV.br_startoff + PREV.br_blockcount - new_endoff;
- trace_xfs_bmap_pre_update(ip, *idx, 0, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, 0, _THIS_IP_);
xfs_bmbt_set_blockcount(ep, temp); /* truncate PREV */
LEFT = *new;
RIGHT.br_state = PREV.br_state;
RIGHT.br_startblock = nullstartblock(
- (int)xfs_bmap_worst_indlen(ip, temp2));
+ (int)xfs_bmap_worst_indlen(bma->ip, temp2));
RIGHT.br_startoff = new_endoff;
RIGHT.br_blockcount = temp2;
/* insert LEFT (r[0]) and RIGHT (r[1]) at the same time */
- xfs_iext_insert(ip, *idx + 1, 2, &LEFT, state);
- ip->i_d.di_nextents++;
- if (cur == NULL)
+ xfs_iext_insert(bma->ip, bma->idx + 1, 2, &LEFT, state);
+ bma->ip->i_d.di_nextents++;
+ if (bma->cur == NULL)
rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur, new->br_startoff,
+ error = xfs_bmbt_lookup_eq(bma->cur, new->br_startoff,
new->br_startblock, new->br_blockcount,
- &i)))
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
- cur->bc_rec.b.br_state = XFS_EXT_NORM;
- if ((error = xfs_btree_insert(cur, &i)))
+ bma->cur->bc_rec.b.br_state = XFS_EXT_NORM;
+ error = xfs_btree_insert(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
- if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
- ip->i_d.di_nextents > ip->i_df.if_ext_max) {
- error = xfs_bmap_extents_to_btree(ip->i_transp, ip,
- first, flist, &cur, 1, &tmp_rval,
- XFS_DATA_FORK);
+ if (bma->ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ bma->ip->i_d.di_nextents > bma->ip->i_df.if_ext_max) {
+ error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+ bma->firstblock, bma->flist, &bma->cur,
+ 1, &tmp_rval, XFS_DATA_FORK);
rval |= tmp_rval;
if (error)
goto done;
}
- temp = xfs_bmap_worst_indlen(ip, temp);
- temp2 = xfs_bmap_worst_indlen(ip, temp2);
+ temp = xfs_bmap_worst_indlen(bma->ip, temp);
+ temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
- (cur ? cur->bc_private.b.allocated : 0));
- if (diff > 0 &&
- xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS,
- -((int64_t)diff), 0)) {
- /*
- * Ick gross gag me with a spoon.
- */
- ASSERT(0); /* want to see if this ever happens! */
- while (diff > 0) {
- if (temp) {
- temp--;
- diff--;
- if (!diff ||
- !xfs_icsb_modify_counters(ip->i_mount,
- XFS_SBS_FDBLOCKS,
- -((int64_t)diff), 0))
- break;
- }
- if (temp2) {
- temp2--;
- diff--;
- if (!diff ||
- !xfs_icsb_modify_counters(ip->i_mount,
- XFS_SBS_FDBLOCKS,
- -((int64_t)diff), 0))
- break;
- }
- }
+ (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+ if (diff > 0) {
+ error = xfs_icsb_modify_counters(bma->ip->i_mount,
+ XFS_SBS_FDBLOCKS,
+ -((int64_t)diff), 0);
+ ASSERT(!error);
+ if (error)
+ goto done;
}
- ep = xfs_iext_get_ext(ifp, *idx);
+
+ ep = xfs_iext_get_ext(ifp, bma->idx);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
- trace_xfs_bmap_pre_update(ip, *idx + 2, state, _THIS_IP_);
- xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, *idx + 2),
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
+ xfs_bmbt_set_startblock(xfs_iext_get_ext(ifp, bma->idx + 2),
nullstartblock((int)temp2));
- trace_xfs_bmap_post_update(ip, *idx + 2, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx + 2, state, _THIS_IP_);
- ++*idx;
- *dnew = temp + temp2;
+ bma->idx++;
+ da_new = temp + temp2;
break;
case BMAP_LEFT_FILLING | BMAP_LEFT_CONTIG | BMAP_RIGHT_CONTIG:
@@ -1115,9 +882,40 @@ xfs_bmap_add_extent_delay_real(
*/
ASSERT(0);
}
- *curp = cur;
+
+ /* convert to a btree if necessary */
+ if (XFS_IFORK_FORMAT(bma->ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(bma->ip, XFS_DATA_FORK) > ifp->if_ext_max) {
+ int tmp_logflags; /* partial log flag return val */
+
+ ASSERT(bma->cur == NULL);
+ error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+ bma->firstblock, bma->flist, &bma->cur,
+ da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+ bma->logflags |= tmp_logflags;
+ if (error)
+ goto done;
+ }
+
+ /* adjust for changes in reserved delayed indirect blocks */
+ if (da_old || da_new) {
+ temp = da_new;
+ if (bma->cur)
+ temp += bma->cur->bc_private.b.allocated;
+ ASSERT(temp <= da_old);
+ if (temp < da_old)
+ xfs_icsb_modify_counters(bma->ip->i_mount,
+ XFS_SBS_FDBLOCKS,
+ (int64_t)(da_old - temp), 0);
+ }
+
+ /* clear out the allocated field, done with it now in any case. */
+ if (bma->cur)
+ bma->cur->bc_private.b.allocated = 0;
+
+ xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
done:
- *logflagsp = rval;
+ bma->logflags |= rval;
return error;
#undef LEFT
#undef RIGHT
@@ -1125,15 +923,17 @@ done:
}
/*
- * Called by xfs_bmap_add_extent to handle cases converting an unwritten
- * allocation to a real allocation or vice versa.
+ * Convert an unwritten allocation to a real allocation or vice versa.
*/
STATIC int /* error */
xfs_bmap_add_extent_unwritten_real(
+ struct xfs_trans *tp,
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t *idx, /* extent number to update/insert */
xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
xfs_bmbt_irec_t *new, /* new data to add to file extents */
+ xfs_fsblock_t *first, /* pointer to firstblock variable */
+ xfs_bmap_free_t *flist, /* list of extents to be freed */
int *logflagsp) /* inode logging flags */
{
xfs_btree_cur_t *cur; /* btree cursor */
@@ -1149,15 +949,25 @@ xfs_bmap_add_extent_unwritten_real(
int rval=0; /* return value (logging flags) */
int state = 0;/* state bits, accessed thru macros */
+ *logflagsp = 0;
+
+ cur = *curp;
+ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+
+ ASSERT(*idx >= 0);
+ ASSERT(*idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(!isnullstartblock(new->br_startblock));
+
+ XFS_STATS_INC(xs_add_exlist);
+
#define LEFT r[0]
#define RIGHT r[1]
#define PREV r[2]
+
/*
* Set up a bunch of variables to make the tests simpler.
*/
error = 0;
- cur = *curp;
- ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
ep = xfs_iext_get_ext(ifp, *idx);
xfs_bmbt_get_all(ep, &PREV);
newext = new->br_state;
@@ -1407,10 +1217,11 @@ xfs_bmap_add_extent_unwritten_real(
goto done;
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
- if (xfs_bmbt_update(cur, LEFT.br_startoff,
+ error = xfs_bmbt_update(cur, LEFT.br_startoff,
LEFT.br_startblock,
LEFT.br_blockcount + new->br_blockcount,
- LEFT.br_state))
+ LEFT.br_state);
+ if (error)
goto done;
}
break;
@@ -1608,9 +1419,29 @@ xfs_bmap_add_extent_unwritten_real(
*/
ASSERT(0);
}
- *curp = cur;
+
+ /* convert to a btree if necessary */
+ if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > ifp->if_ext_max) {
+ int tmp_logflags; /* partial log flag return val */
+
+ ASSERT(cur == NULL);
+ error = xfs_bmap_extents_to_btree(tp, ip, first, flist, &cur,
+ 0, &tmp_logflags, XFS_DATA_FORK);
+ *logflagsp |= tmp_logflags;
+ if (error)
+ goto done;
+ }
+
+ /* clear out the allocated field, done with it now in any case. */
+ if (cur) {
+ cur->bc_private.b.allocated = 0;
+ *curp = cur;
+ }
+
+ xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK);
done:
- *logflagsp = rval;
+ *logflagsp |= rval;
return error;
#undef LEFT
#undef RIGHT
@@ -1618,16 +1449,13 @@ done:
}
/*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a delayed allocation.
+ * Convert a hole to a delayed allocation.
*/
-/*ARGSUSED*/
-STATIC int /* error */
+STATIC void
xfs_bmap_add_extent_hole_delay(
xfs_inode_t *ip, /* incore inode pointer */
xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp) /* inode logging flags */
+ xfs_bmbt_irec_t *new) /* new data to add to file extents */
{
xfs_ifork_t *ifp; /* inode fork pointer */
xfs_bmbt_irec_t left; /* left neighbor extent entry */
@@ -1762,23 +1590,17 @@ xfs_bmap_add_extent_hole_delay(
* Nothing to do for disk quota accounting here.
*/
}
- *logflagsp = 0;
- return 0;
}
/*
- * Called by xfs_bmap_add_extent to handle cases converting a hole
- * to a real allocation.
+ * Convert a hole to a real allocation.
*/
STATIC int /* error */
xfs_bmap_add_extent_hole_real(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_extnum_t *idx, /* extent number to update/insert */
- xfs_btree_cur_t *cur, /* if null, not a btree */
- xfs_bmbt_irec_t *new, /* new data to add to file extents */
- int *logflagsp, /* inode logging flags */
- int whichfork) /* data or attr fork */
+ struct xfs_bmalloca *bma,
+ int whichfork)
{
+ struct xfs_bmbt_irec *new = &bma->got;
int error; /* error return value */
int i; /* temp state */
xfs_ifork_t *ifp; /* inode fork pointer */
@@ -1787,19 +1609,26 @@ xfs_bmap_add_extent_hole_real(
int rval=0; /* return value (logging flags) */
int state; /* state bits, accessed thru macros */
- ifp = XFS_IFORK_PTR(ip, whichfork);
- ASSERT(*idx <= ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t));
- state = 0;
+ ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+
+ ASSERT(bma->idx >= 0);
+ ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
+ ASSERT(!isnullstartblock(new->br_startblock));
+ ASSERT(!bma->cur ||
+ !(bma->cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL));
+
+ XFS_STATS_INC(xs_add_exlist);
+ state = 0;
if (whichfork == XFS_ATTR_FORK)
state |= BMAP_ATTRFORK;
/*
* Check and set flags if this segment has a left neighbor.
*/
- if (*idx > 0) {
+ if (bma->idx > 0) {
state |= BMAP_LEFT_VALID;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx - 1), &left);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1), &left);
if (isnullstartblock(left.br_startblock))
state |= BMAP_LEFT_DELAY;
}
@@ -1808,9 +1637,9 @@ xfs_bmap_add_extent_hole_real(
* Check and set flags if this segment has a current value.
* Not true if we're inserting into the "hole" at eof.
*/
- if (*idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
+ if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t)) {
state |= BMAP_RIGHT_VALID;
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx), &right);
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &right);
if (isnullstartblock(right.br_startblock))
state |= BMAP_RIGHT_DELAY;
}
@@ -1847,39 +1676,42 @@ xfs_bmap_add_extent_hole_real(
* left and on the right.
* Merge all three into a single extent record.
*/
- --*idx;
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
+ --bma->idx;
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
left.br_blockcount + new->br_blockcount +
right.br_blockcount);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- xfs_iext_remove(ip, *idx + 1, 1, state);
+ xfs_iext_remove(bma->ip, bma->idx + 1, 1, state);
- XFS_IFORK_NEXT_SET(ip, whichfork,
- XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
- if (cur == NULL) {
+ XFS_IFORK_NEXT_SET(bma->ip, whichfork,
+ XFS_IFORK_NEXTENTS(bma->ip, whichfork) - 1);
+ if (bma->cur == NULL) {
rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur,
- right.br_startoff,
- right.br_startblock,
- right.br_blockcount, &i)))
+ error = xfs_bmbt_lookup_eq(bma->cur, right.br_startoff,
+ right.br_startblock, right.br_blockcount,
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_btree_delete(cur, &i)))
+ error = xfs_btree_delete(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_btree_decrement(cur, 0, &i)))
+ error = xfs_btree_decrement(bma->cur, 0, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, left.br_startoff,
+ error = xfs_bmbt_update(bma->cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
new->br_blockcount +
right.br_blockcount,
- left.br_state)))
+ left.br_state);
+ if (error)
goto done;
}
break;
@@ -1890,27 +1722,28 @@ xfs_bmap_add_extent_hole_real(
* on the left.
* Merge the new allocation with the left neighbor.
*/
- --*idx;
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
- xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, *idx),
+ --bma->idx;
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ xfs_bmbt_set_blockcount(xfs_iext_get_ext(ifp, bma->idx),
left.br_blockcount + new->br_blockcount);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- if (cur == NULL) {
+ if (bma->cur == NULL) {
rval = xfs_ilog_fext(whichfork);
} else {
rval = 0;
- if ((error = xfs_bmbt_lookup_eq(cur,
- left.br_startoff,
- left.br_startblock,
- left.br_blockcount, &i)))
+ error = xfs_bmbt_lookup_eq(bma->cur, left.br_startoff,
+ left.br_startblock, left.br_blockcount,
+ &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, left.br_startoff,
+ error = xfs_bmbt_update(bma->cur, left.br_startoff,
left.br_startblock,
left.br_blockcount +
new->br_blockcount,
- left.br_state)))
+ left.br_state);
+ if (error)
goto done;
}
break;
@@ -1921,28 +1754,30 @@ xfs_bmap_add_extent_hole_real(
* on the right.
* Merge the new allocation with the right neighbor.
*/
- trace_xfs_bmap_pre_update(ip, *idx, state, _THIS_IP_);
- xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, *idx),
+ trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
+ xfs_bmbt_set_allf(xfs_iext_get_ext(ifp, bma->idx),
new->br_startoff, new->br_startblock,
new->br_blockcount + right.br_blockcount,
right.br_state);
- trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
+ trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
- if (cur == NULL) {
+ if (bma->cur == NULL) {
rval = xfs_ilog_fext(whichfork);
} else {
rval = 0;
- if ((error = xfs_bmbt_lookup_eq(cur,
+ error = xfs_bmbt_lookup_eq(bma->cur,
right.br_startoff,
right.br_startblock,
- right.br_blockcount, &i)))
+ right.br_blockcount, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
- if ((error = xfs_bmbt_update(cur, new->br_startoff,
+ error = xfs_bmbt_update(bma->cur, new->br_startoff,
new->br_startblock,
new->br_blockcount +
right.br_blockcount,
- right.br_state)))
+ right.br_state);
+ if (error)
goto done;
}
break;
@@ -1953,28 +1788,50 @@ xfs_bmap_add_extent_hole_real(
* real allocation.
* Insert a new entry.
*/
- xfs_iext_insert(ip, *idx, 1, new, state);
- XFS_IFORK_NEXT_SET(ip, whichfork,
- XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
- if (cur == NULL) {
+ xfs_iext_insert(bma->ip, bma->idx, 1, new, state);
+ XFS_IFORK_NEXT_SET(bma->ip, whichfork,
+ XFS_IFORK_NEXTENTS(bma->ip, whichfork) + 1);
+ if (bma->cur == NULL) {
rval = XFS_ILOG_CORE | xfs_ilog_fext(whichfork);
} else {
rval = XFS_ILOG_CORE;
- if ((error = xfs_bmbt_lookup_eq(cur,
+ error = xfs_bmbt_lookup_eq(bma->cur,
new->br_startoff,
new->br_startblock,
- new->br_blockcount, &i)))
+ new->br_blockcount, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 0, done);
- cur->bc_rec.b.br_state = new->br_state;
- if ((error = xfs_btree_insert(cur, &i)))
+ bma->cur->bc_rec.b.br_state = new->br_state;
+ error = xfs_btree_insert(bma->cur, &i);
+ if (error)
goto done;
XFS_WANT_CORRUPTED_GOTO(i == 1, done);
}
break;
}
+
+ /* convert to a btree if necessary */
+ if (XFS_IFORK_FORMAT(bma->ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(bma->ip, whichfork) > ifp->if_ext_max) {
+ int tmp_logflags; /* partial log flag return val */
+
+ ASSERT(bma->cur == NULL);
+ error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
+ bma->firstblock, bma->flist, &bma->cur,
+ 0, &tmp_logflags, whichfork);
+ bma->logflags |= tmp_logflags;
+ if (error)
+ goto done;
+ }
+
+ /* clear out the allocated field, done with it now in any case. */
+ if (bma->cur)
+ bma->cur->bc_private.b.allocated = 0;
+
+ xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
- *logflagsp = rval;
+ bma->logflags |= rval;
return error;
}
@@ -2161,26 +2018,26 @@ xfs_bmap_adjacent(
XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks)
mp = ap->ip->i_mount;
- nullfb = ap->firstblock == NULLFSBLOCK;
+ nullfb = *ap->firstblock == NULLFSBLOCK;
rt = XFS_IS_REALTIME_INODE(ap->ip) && ap->userdata;
- fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+ fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
/*
* If allocating at eof, and there's a previous real block,
* try to use its last block as our starting point.
*/
- if (ap->eof && ap->prevp->br_startoff != NULLFILEOFF &&
- !isnullstartblock(ap->prevp->br_startblock) &&
- ISVALID(ap->prevp->br_startblock + ap->prevp->br_blockcount,
- ap->prevp->br_startblock)) {
- ap->rval = ap->prevp->br_startblock + ap->prevp->br_blockcount;
+ if (ap->eof && ap->prev.br_startoff != NULLFILEOFF &&
+ !isnullstartblock(ap->prev.br_startblock) &&
+ ISVALID(ap->prev.br_startblock + ap->prev.br_blockcount,
+ ap->prev.br_startblock)) {
+ ap->blkno = ap->prev.br_startblock + ap->prev.br_blockcount;
/*
* Adjust for the gap between prevp and us.
*/
- adjust = ap->off -
- (ap->prevp->br_startoff + ap->prevp->br_blockcount);
+ adjust = ap->offset -
+ (ap->prev.br_startoff + ap->prev.br_blockcount);
if (adjust &&
- ISVALID(ap->rval + adjust, ap->prevp->br_startblock))
- ap->rval += adjust;
+ ISVALID(ap->blkno + adjust, ap->prev.br_startblock))
+ ap->blkno += adjust;
}
/*
* If not at eof, then compare the two neighbor blocks.
@@ -2197,17 +2054,17 @@ xfs_bmap_adjacent(
* If there's a previous (left) block, select a requested
* start block based on it.
*/
- if (ap->prevp->br_startoff != NULLFILEOFF &&
- !isnullstartblock(ap->prevp->br_startblock) &&
- (prevbno = ap->prevp->br_startblock +
- ap->prevp->br_blockcount) &&
- ISVALID(prevbno, ap->prevp->br_startblock)) {
+ if (ap->prev.br_startoff != NULLFILEOFF &&
+ !isnullstartblock(ap->prev.br_startblock) &&
+ (prevbno = ap->prev.br_startblock +
+ ap->prev.br_blockcount) &&
+ ISVALID(prevbno, ap->prev.br_startblock)) {
/*
* Calculate gap to end of previous block.
*/
- adjust = prevdiff = ap->off -
- (ap->prevp->br_startoff +
- ap->prevp->br_blockcount);
+ adjust = prevdiff = ap->offset -
+ (ap->prev.br_startoff +
+ ap->prev.br_blockcount);
/*
* Figure the startblock based on the previous block's
* end and the gap size.
@@ -2216,9 +2073,9 @@ xfs_bmap_adjacent(
* allocating, or using it gives us an invalid block
* number, then just use the end of the previous block.
*/
- if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+ if (prevdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
ISVALID(prevbno + prevdiff,
- ap->prevp->br_startblock))
+ ap->prev.br_startblock))
prevbno += adjust;
else
prevdiff += adjust;
@@ -2239,16 +2096,16 @@ xfs_bmap_adjacent(
* If there's a following (right) block, select a requested
* start block based on it.
*/
- if (!isnullstartblock(ap->gotp->br_startblock)) {
+ if (!isnullstartblock(ap->got.br_startblock)) {
/*
* Calculate gap to start of next block.
*/
- adjust = gotdiff = ap->gotp->br_startoff - ap->off;
+ adjust = gotdiff = ap->got.br_startoff - ap->offset;
/*
* Figure the startblock based on the next block's
* start and the gap size.
*/
- gotbno = ap->gotp->br_startblock;
+ gotbno = ap->got.br_startblock;
/*
* Heuristic!
* If the gap is large relative to the piece we're
@@ -2256,12 +2113,12 @@ xfs_bmap_adjacent(
* number, then just use the start of the next block
* offset by our length.
*/
- if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->alen &&
+ if (gotdiff <= XFS_ALLOC_GAP_UNITS * ap->length &&
ISVALID(gotbno - gotdiff, gotbno))
gotbno -= adjust;
- else if (ISVALID(gotbno - ap->alen, gotbno)) {
- gotbno -= ap->alen;
- gotdiff += adjust - ap->alen;
+ else if (ISVALID(gotbno - ap->length, gotbno)) {
+ gotbno -= ap->length;
+ gotdiff += adjust - ap->length;
} else
gotdiff += adjust;
/*
@@ -2279,14 +2136,14 @@ xfs_bmap_adjacent(
gotbno = NULLFSBLOCK;
/*
* If both valid, pick the better one, else the only good
- * one, else ap->rval is already set (to 0 or the inode block).
+ * one, else ap->blkno is already set (to 0 or the inode block).
*/
if (prevbno != NULLFSBLOCK && gotbno != NULLFSBLOCK)
- ap->rval = prevdiff <= gotdiff ? prevbno : gotbno;
+ ap->blkno = prevdiff <= gotdiff ? prevbno : gotbno;
else if (prevbno != NULLFSBLOCK)
- ap->rval = prevbno;
+ ap->blkno = prevbno;
else if (gotbno != NULLFSBLOCK)
- ap->rval = gotbno;
+ ap->blkno = gotbno;
}
#undef ISVALID
}
@@ -2306,24 +2163,24 @@ xfs_bmap_rtalloc(
mp = ap->ip->i_mount;
align = xfs_get_extsz_hint(ap->ip);
prod = align / mp->m_sb.sb_rextsize;
- error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+ error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
align, 1, ap->eof, 0,
- ap->conv, &ap->off, &ap->alen);
+ ap->conv, &ap->offset, &ap->length);
if (error)
return error;
- ASSERT(ap->alen);
- ASSERT(ap->alen % mp->m_sb.sb_rextsize == 0);
+ ASSERT(ap->length);
+ ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
/*
* If the offset & length are not perfectly aligned
* then kill prod, it will just get us in trouble.
*/
- if (do_mod(ap->off, align) || ap->alen % align)
+ if (do_mod(ap->offset, align) || ap->length % align)
prod = 1;
/*
* Set ralen to be the actual requested length in rtextents.
*/
- ralen = ap->alen / mp->m_sb.sb_rextsize;
+ ralen = ap->length / mp->m_sb.sb_rextsize;
/*
* If the old value was close enough to MAXEXTLEN that
* we rounded up to it, cut it back so it's valid again.
@@ -2338,21 +2195,21 @@ xfs_bmap_rtalloc(
* Lock out other modifications to the RT bitmap inode.
*/
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
* If it's an allocation to an empty file at offset 0,
* pick an extent that will space things out in the rt area.
*/
- if (ap->eof && ap->off == 0) {
+ if (ap->eof && ap->offset == 0) {
xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
if (error)
return error;
- ap->rval = rtx * mp->m_sb.sb_rextsize;
+ ap->blkno = rtx * mp->m_sb.sb_rextsize;
} else {
- ap->rval = 0;
+ ap->blkno = 0;
}
xfs_bmap_adjacent(ap);
@@ -2360,23 +2217,23 @@ xfs_bmap_rtalloc(
/*
* Realtime allocation, done through xfs_rtallocate_extent.
*/
- atype = ap->rval == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
- do_div(ap->rval, mp->m_sb.sb_rextsize);
- rtb = ap->rval;
- ap->alen = ralen;
- if ((error = xfs_rtallocate_extent(ap->tp, ap->rval, 1, ap->alen,
+ atype = ap->blkno == 0 ? XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
+ do_div(ap->blkno, mp->m_sb.sb_rextsize);
+ rtb = ap->blkno;
+ ap->length = ralen;
+ if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
&ralen, atype, ap->wasdel, prod, &rtb)))
return error;
if (rtb == NULLFSBLOCK && prod > 1 &&
- (error = xfs_rtallocate_extent(ap->tp, ap->rval, 1,
- ap->alen, &ralen, atype,
+ (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
+ ap->length, &ralen, atype,
ap->wasdel, 1, &rtb)))
return error;
- ap->rval = rtb;
- if (ap->rval != NULLFSBLOCK) {
- ap->rval *= mp->m_sb.sb_rextsize;
+ ap->blkno = rtb;
+ if (ap->blkno != NULLFSBLOCK) {
+ ap->blkno *= mp->m_sb.sb_rextsize;
ralen *= mp->m_sb.sb_rextsize;
- ap->alen = ralen;
+ ap->length = ralen;
ap->ip->i_d.di_nblocks += ralen;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel)
@@ -2389,7 +2246,7 @@ xfs_bmap_rtalloc(
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
} else {
- ap->alen = 0;
+ ap->length = 0;
}
return 0;
}
@@ -2504,7 +2361,7 @@ xfs_bmap_btalloc_nullfb(
* AG as the stream may have moved.
*/
if (xfs_inode_is_filestream(ap->ip))
- ap->rval = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
+ ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);
return 0;
}
@@ -2526,55 +2383,57 @@ xfs_bmap_btalloc(
int tryagain;
int error;
+ ASSERT(ap->length);
+
mp = ap->ip->i_mount;
align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;
if (unlikely(align)) {
- error = xfs_bmap_extsize_align(mp, ap->gotp, ap->prevp,
+ error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
align, 0, ap->eof, 0, ap->conv,
- &ap->off, &ap->alen);
+ &ap->offset, &ap->length);
ASSERT(!error);
- ASSERT(ap->alen);
+ ASSERT(ap->length);
}
- nullfb = ap->firstblock == NULLFSBLOCK;
- fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, ap->firstblock);
+ nullfb = *ap->firstblock == NULLFSBLOCK;
+ fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);
if (nullfb) {
if (ap->userdata && xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
- ap->rval = XFS_AGB_TO_FSB(mp, ag, 0);
+ ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0);
} else {
- ap->rval = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
+ ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino);
}
} else
- ap->rval = ap->firstblock;
+ ap->blkno = *ap->firstblock;
xfs_bmap_adjacent(ap);
/*
- * If allowed, use ap->rval; otherwise must use firstblock since
+ * If allowed, use ap->blkno; otherwise must use firstblock since
* it's in the right allocation group.
*/
- if (nullfb || XFS_FSB_TO_AGNO(mp, ap->rval) == fb_agno)
+ if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno)
;
else
- ap->rval = ap->firstblock;
+ ap->blkno = *ap->firstblock;
/*
* Normal allocation, done through xfs_alloc_vextent.
*/
tryagain = isaligned = 0;
args.tp = ap->tp;
args.mp = mp;
- args.fsbno = ap->rval;
+ args.fsbno = ap->blkno;
/* Trim the allocation back to the maximum an AG can fit. */
- args.maxlen = MIN(ap->alen, XFS_ALLOC_AG_MAX_USABLE(mp));
- args.firstblock = ap->firstblock;
+ args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp));
+ args.firstblock = *ap->firstblock;
blen = 0;
if (nullfb) {
error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);
if (error)
return error;
- } else if (ap->low) {
+ } else if (ap->flist->xbf_low) {
if (xfs_inode_is_filestream(ap->ip))
args.type = XFS_ALLOCTYPE_FIRST_AG;
else
@@ -2588,14 +2447,14 @@ xfs_bmap_btalloc(
/* apply extent size hints if obtained earlier */
if (unlikely(align)) {
args.prod = align;
- if ((args.mod = (xfs_extlen_t)do_mod(ap->off, args.prod)))
+ if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
} else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
args.prod = 1;
args.mod = 0;
} else {
args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
- if ((args.mod = (xfs_extlen_t)(do_mod(ap->off, args.prod))))
+ if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
}
/*
@@ -2607,8 +2466,8 @@ xfs_bmap_btalloc(
* is >= the stripe unit and the allocation offset is
* at the end of file.
*/
- if (!ap->low && ap->aeof) {
- if (!ap->off) {
+ if (!ap->flist->xbf_low && ap->aeof) {
+ if (!ap->offset) {
args.alignment = mp->m_dalign;
atype = args.type;
isaligned = 1;
@@ -2661,7 +2520,7 @@ xfs_bmap_btalloc(
* turned on.
*/
args.type = atype;
- args.fsbno = ap->rval;
+ args.fsbno = ap->blkno;
args.alignment = mp->m_dalign;
args.minlen = nextminlen;
args.minalignslop = 0;
@@ -2675,7 +2534,7 @@ xfs_bmap_btalloc(
* try again.
*/
args.type = atype;
- args.fsbno = ap->rval;
+ args.fsbno = ap->blkno;
args.alignment = 0;
if ((error = xfs_alloc_vextent(&args)))
return error;
@@ -2684,7 +2543,7 @@ xfs_bmap_btalloc(
args.minlen > ap->minlen) {
args.minlen = ap->minlen;
args.type = XFS_ALLOCTYPE_START_BNO;
- args.fsbno = ap->rval;
+ args.fsbno = ap->blkno;
if ((error = xfs_alloc_vextent(&args)))
return error;
}
@@ -2695,13 +2554,26 @@ xfs_bmap_btalloc(
args.minleft = 0;
if ((error = xfs_alloc_vextent(&args)))
return error;
- ap->low = 1;
+ ap->flist->xbf_low = 1;
}
if (args.fsbno != NULLFSBLOCK) {
- ap->firstblock = ap->rval = args.fsbno;
+ /*
+ * check the allocation happened at the same or higher AG than
+ * the first block that was allocated.
+ */
+ ASSERT(*ap->firstblock == NULLFSBLOCK ||
+ XFS_FSB_TO_AGNO(mp, *ap->firstblock) ==
+ XFS_FSB_TO_AGNO(mp, args.fsbno) ||
+ (ap->flist->xbf_low &&
+ XFS_FSB_TO_AGNO(mp, *ap->firstblock) <
+ XFS_FSB_TO_AGNO(mp, args.fsbno)));
+
+ ap->blkno = args.fsbno;
+ if (*ap->firstblock == NULLFSBLOCK)
+ *ap->firstblock = args.fsbno;
ASSERT(nullfb || fb_agno == args.agno ||
- (ap->low && fb_agno < args.agno));
- ap->alen = args.len;
+ (ap->flist->xbf_low && fb_agno < args.agno));
+ ap->length = args.len;
ap->ip->i_d.di_nblocks += args.len;
xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
if (ap->wasdel)
@@ -2715,8 +2587,8 @@ xfs_bmap_btalloc(
XFS_TRANS_DQ_BCOUNT,
(long) args.len);
} else {
- ap->rval = NULLFSBLOCK;
- ap->alen = 0;
+ ap->blkno = NULLFSBLOCK;
+ ap->length = 0;
}
return 0;
}
@@ -2871,8 +2743,8 @@ xfs_bmap_del_extent(
len = del->br_blockcount;
do_div(bno, mp->m_sb.sb_rextsize);
do_div(len, mp->m_sb.sb_rextsize);
- if ((error = xfs_rtfree_extent(ip->i_transp, bno,
- (xfs_extlen_t)len)))
+ error = xfs_rtfree_extent(tp, bno, (xfs_extlen_t)len);
+ if (error)
goto done;
do_fx = 0;
nblks = len * mp->m_sb.sb_rextsize;
@@ -3345,8 +3217,7 @@ xfs_bmap_local_to_extents(
* We don't want to deal with the case of keeping inode data inline yet.
* So sending the data fork of a regular inode is invalid.
*/
- ASSERT(!((ip->i_d.di_mode & S_IFMT) == S_IFREG &&
- whichfork == XFS_DATA_FORK));
+ ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
flags = 0;
@@ -3385,8 +3256,7 @@ xfs_bmap_local_to_extents(
ASSERT(args.len == 1);
*firstblock = args.fsbno;
bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno, 0);
- memcpy((char *)XFS_BUF_PTR(bp), ifp->if_u1.if_data,
- ifp->if_bytes);
+ memcpy(bp->b_addr, ifp->if_u1.if_data, ifp->if_bytes);
xfs_trans_log_buf(tp, bp, 0, ifp->if_bytes - 1);
xfs_bmap_forkoff_reset(args.mp, ip, whichfork);
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
@@ -3592,7 +3462,7 @@ xfs_bmap_add_attrfork(
}
ASSERT(ip->i_d.di_anextents == 0);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
switch (ip->i_d.di_format) {
@@ -3989,42 +3859,122 @@ xfs_bmap_last_before(
return 0;
}
+STATIC int
+xfs_bmap_last_extent(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ int whichfork,
+ struct xfs_bmbt_irec *rec,
+ int *is_empty)
+{
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ int error;
+ int nextents;
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ nextents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t);
+ if (nextents == 0) {
+ *is_empty = 1;
+ return 0;
+ }
+
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, nextents - 1), rec);
+ *is_empty = 0;
+ return 0;
+}
+
+/*
+ * Check the last inode extent to determine whether this allocation will result
+ * in blocks being allocated at the end of the file. When we allocate new data
+ * blocks at the end of the file which do not start at the previous data block,
+ * we will try to align the new blocks at stripe unit boundaries.
+ *
+ * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be
+ * at, or past the EOF.
+ */
+STATIC int
+xfs_bmap_isaeof(
+ struct xfs_bmalloca *bma,
+ int whichfork)
+{
+ struct xfs_bmbt_irec rec;
+ int is_empty;
+ int error;
+
+ bma->aeof = 0;
+ error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,
+ &is_empty);
+ if (error || is_empty)
+ return error;
+
+ /*
+ * Check if we are allocation or past the last extent, or at least into
+ * the last delayed allocated extent.
+ */
+ bma->aeof = bma->offset >= rec.br_startoff + rec.br_blockcount ||
+ (bma->offset >= rec.br_startoff &&
+ isnullstartblock(rec.br_startblock));
+ return 0;
+}
+
+/*
+ * Check if the endoff is outside the last extent. If so the caller will grow
+ * the allocation to a stripe unit boundary. All offsets are considered outside
+ * the end of file for an empty fork, so 1 is returned in *eof in that case.
+ */
+int
+xfs_bmap_eof(
+ struct xfs_inode *ip,
+ xfs_fileoff_t endoff,
+ int whichfork,
+ int *eof)
+{
+ struct xfs_bmbt_irec rec;
+ int error;
+
+ error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
+ if (error || *eof)
+ return error;
+
+ *eof = endoff >= rec.br_startoff + rec.br_blockcount;
+ return 0;
+}
+
/*
* Returns the file-relative block number of the first block past eof in
* the file. This is not based on i_size, it is based on the extent records.
* Returns 0 for local files, as they do not have extent records.
*/
-int /* error */
+int
xfs_bmap_last_offset(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- xfs_fileoff_t *last_block, /* last block */
- int whichfork) /* data or attr fork */
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_fileoff_t *last_block,
+ int whichfork)
{
- xfs_bmbt_rec_host_t *ep; /* pointer to last extent */
- int error; /* error return value */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t nextents; /* number of extent entries */
+ struct xfs_bmbt_irec rec;
+ int is_empty;
+ int error;
+
+ *last_block = 0;
+
+ if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
+ return 0;
if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL)
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
return XFS_ERROR(EIO);
- if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
- *last_block = 0;
- return 0;
- }
- ifp = XFS_IFORK_PTR(ip, whichfork);
- if (!(ifp->if_flags & XFS_IFEXTENTS) &&
- (error = xfs_iread_extents(tp, ip, whichfork)))
+
+ error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
+ if (error || is_empty)
return error;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- if (!nextents) {
- *last_block = 0;
- return 0;
- }
- ep = xfs_iext_get_ext(ifp, nextents - 1);
- *last_block = xfs_bmbt_get_startoff(ep) + xfs_bmbt_get_blockcount(ep);
+
+ *last_block = rec.br_startoff + rec.br_blockcount;
return 0;
}
@@ -4045,7 +3995,7 @@ xfs_bmap_one_block(
#ifndef DEBUG
if (whichfork == XFS_DATA_FORK) {
- return ((ip->i_d.di_mode & S_IFMT) == S_IFREG) ?
+ return S_ISREG(ip->i_d.di_mode) ?
(ip->i_size == ip->i_mount->m_sb.sb_blocksize) :
(ip->i_d.di_size == ip->i_mount->m_sb.sb_blocksize);
}
@@ -4072,7 +4022,7 @@ xfs_bmap_sanity_check(
{
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- if (be32_to_cpu(block->bb_magic) != XFS_BMAP_MAGIC ||
+ if (block->bb_magic != cpu_to_be32(XFS_BMAP_MAGIC) ||
be16_to_cpu(block->bb_level) != level ||
be16_to_cpu(block->bb_numrecs) == 0 ||
be16_to_cpu(block->bb_numrecs) > mp->m_bmap_dmxr[level != 0])
@@ -4154,7 +4104,6 @@ xfs_bmap_read_extents(
xfs_extnum_t num_recs;
xfs_extnum_t start;
-
num_recs = xfs_btree_get_numrecs(block);
if (unlikely(i + num_recs > room)) {
ASSERT(i + num_recs <= room);
@@ -4277,9 +4226,8 @@ xfs_bmap_validate_ret(
ASSERT(i == 0 ||
mval[i - 1].br_startoff + mval[i - 1].br_blockcount ==
mval[i].br_startoff);
- if ((flags & XFS_BMAPI_WRITE) && !(flags & XFS_BMAPI_DELAY))
- ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
- mval[i].br_startblock != HOLESTARTBLOCK);
+ ASSERT(mval[i].br_startblock != DELAYSTARTBLOCK &&
+ mval[i].br_startblock != HOLESTARTBLOCK);
ASSERT(mval[i].br_state == XFS_EXT_NORM ||
mval[i].br_state == XFS_EXT_UNWRITTEN);
}
@@ -4288,66 +4236,611 @@ xfs_bmap_validate_ret(
/*
- * Map file blocks to filesystem blocks.
- * File range is given by the bno/len pair.
- * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
- * into a hole or past eof.
- * Only allocates blocks from a single allocation group,
- * to avoid locking problems.
+ * Trim the returned map to the required bounds
+ */
+STATIC void
+xfs_bmapi_trim_map(
+ struct xfs_bmbt_irec *mval,
+ struct xfs_bmbt_irec *got,
+ xfs_fileoff_t *bno,
+ xfs_filblks_t len,
+ xfs_fileoff_t obno,
+ xfs_fileoff_t end,
+ int n,
+ int flags)
+{
+ if ((flags & XFS_BMAPI_ENTIRE) ||
+ got->br_startoff + got->br_blockcount <= obno) {
+ *mval = *got;
+ if (isnullstartblock(got->br_startblock))
+ mval->br_startblock = DELAYSTARTBLOCK;
+ return;
+ }
+
+ if (obno > *bno)
+ *bno = obno;
+ ASSERT((*bno >= obno) || (n == 0));
+ ASSERT(*bno < end);
+ mval->br_startoff = *bno;
+ if (isnullstartblock(got->br_startblock))
+ mval->br_startblock = DELAYSTARTBLOCK;
+ else
+ mval->br_startblock = got->br_startblock +
+ (*bno - got->br_startoff);
+ /*
+ * Return the minimum of what we got and what we asked for for
+ * the length. We can use the len variable here because it is
+ * modified below and we could have been there before coming
+ * here if the first part of the allocation didn't overlap what
+ * was asked for.
+ */
+ mval->br_blockcount = XFS_FILBLKS_MIN(end - *bno,
+ got->br_blockcount - (*bno - got->br_startoff));
+ mval->br_state = got->br_state;
+ ASSERT(mval->br_blockcount <= len);
+ return;
+}
+
+/*
+ * Update and validate the extent map to return
+ */
+STATIC void
+xfs_bmapi_update_map(
+ struct xfs_bmbt_irec **map,
+ xfs_fileoff_t *bno,
+ xfs_filblks_t *len,
+ xfs_fileoff_t obno,
+ xfs_fileoff_t end,
+ int *n,
+ int flags)
+{
+ xfs_bmbt_irec_t *mval = *map;
+
+ ASSERT((flags & XFS_BMAPI_ENTIRE) ||
+ ((mval->br_startoff + mval->br_blockcount) <= end));
+ ASSERT((flags & XFS_BMAPI_ENTIRE) || (mval->br_blockcount <= *len) ||
+ (mval->br_startoff < obno));
+
+ *bno = mval->br_startoff + mval->br_blockcount;
+ *len = end - *bno;
+ if (*n > 0 && mval->br_startoff == mval[-1].br_startoff) {
+ /* update previous map with new information */
+ ASSERT(mval->br_startblock == mval[-1].br_startblock);
+ ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
+ ASSERT(mval->br_state == mval[-1].br_state);
+ mval[-1].br_blockcount = mval->br_blockcount;
+ mval[-1].br_state = mval->br_state;
+ } else if (*n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
+ mval[-1].br_startblock != DELAYSTARTBLOCK &&
+ mval[-1].br_startblock != HOLESTARTBLOCK &&
+ mval->br_startblock == mval[-1].br_startblock +
+ mval[-1].br_blockcount &&
+ ((flags & XFS_BMAPI_IGSTATE) ||
+ mval[-1].br_state == mval->br_state)) {
+ ASSERT(mval->br_startoff ==
+ mval[-1].br_startoff + mval[-1].br_blockcount);
+ mval[-1].br_blockcount += mval->br_blockcount;
+ } else if (*n > 0 &&
+ mval->br_startblock == DELAYSTARTBLOCK &&
+ mval[-1].br_startblock == DELAYSTARTBLOCK &&
+ mval->br_startoff ==
+ mval[-1].br_startoff + mval[-1].br_blockcount) {
+ mval[-1].br_blockcount += mval->br_blockcount;
+ mval[-1].br_state = mval->br_state;
+ } else if (!((*n == 0) &&
+ ((mval->br_startoff + mval->br_blockcount) <=
+ obno))) {
+ mval++;
+ (*n)++;
+ }
+ *map = mval;
+}
+
+/*
+ * Map file blocks to filesystem blocks without allocation.
+ */
+int
+xfs_bmapi_read(
+ struct xfs_inode *ip,
+ xfs_fileoff_t bno,
+ xfs_filblks_t len,
+ struct xfs_bmbt_irec *mval,
+ int *nmap,
+ int flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ struct xfs_bmbt_irec got;
+ struct xfs_bmbt_irec prev;
+ xfs_fileoff_t obno;
+ xfs_fileoff_t end;
+ xfs_extnum_t lastx;
+ int error;
+ int eof;
+ int n = 0;
+ int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+
+ ASSERT(*nmap >= 1);
+ ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|
+ XFS_BMAPI_IGSTATE)));
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ XFS_STATS_INC(xs_blk_mapr);
+
+ ifp = XFS_IFORK_PTR(ip, whichfork);
+ ASSERT(ifp->if_ext_max ==
+ XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, whichfork);
+ if (error)
+ return error;
+ }
+
+ xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got, &prev);
+ end = bno + len;
+ obno = bno;
+
+ while (bno < end && n < *nmap) {
+ /* Reading past eof, act as though there's a hole up to end. */
+ if (eof)
+ got.br_startoff = end;
+ if (got.br_startoff > bno) {
+ /* Reading in a hole. */
+ mval->br_startoff = bno;
+ mval->br_startblock = HOLESTARTBLOCK;
+ mval->br_blockcount =
+ XFS_FILBLKS_MIN(len, got.br_startoff - bno);
+ mval->br_state = XFS_EXT_NORM;
+ bno += mval->br_blockcount;
+ len -= mval->br_blockcount;
+ mval++;
+ n++;
+ continue;
+ }
+
+ /* set up the extent map to return. */
+ xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
+ xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+ /* If we're done, stop now. */
+ if (bno >= end || n >= *nmap)
+ break;
+
+ /* Else go on to the next record. */
+ if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+ else
+ eof = 1;
+ }
+ *nmap = n;
+ return 0;
+}
+
+STATIC int
+xfs_bmapi_reserve_delalloc(
+ struct xfs_inode *ip,
+ xfs_fileoff_t aoff,
+ xfs_filblks_t len,
+ struct xfs_bmbt_irec *got,
+ struct xfs_bmbt_irec *prev,
+ xfs_extnum_t *lastx,
+ int eof)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ xfs_extlen_t alen;
+ xfs_extlen_t indlen;
+ char rt = XFS_IS_REALTIME_INODE(ip);
+ xfs_extlen_t extsz;
+ int error;
+
+ alen = XFS_FILBLKS_MIN(len, MAXEXTLEN);
+ if (!eof)
+ alen = XFS_FILBLKS_MIN(alen, got->br_startoff - aoff);
+
+ /* Figure out the extent size, adjust alen */
+ extsz = xfs_get_extsz_hint(ip);
+ if (extsz) {
+ /*
+ * Make sure we don't exceed a single extent length when we
+ * align the extent by reducing length we are going to
+ * allocate by the maximum amount extent size aligment may
+ * require.
+ */
+ alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1));
+ error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof,
+ 1, 0, &aoff, &alen);
+ ASSERT(!error);
+ }
+
+ if (rt)
+ extsz = alen / mp->m_sb.sb_rextsize;
+
+ /*
+ * Make a transaction-less quota reservation for delayed allocation
+ * blocks. This number gets adjusted later. We return if we haven't
+ * allocated blocks already inside this loop.
+ */
+ error = xfs_trans_reserve_quota_nblks(NULL, ip, (long)alen, 0,
+ rt ? XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ return error;
+
+ /*
+ * Split changing sb for alen and indlen since they could be coming
+ * from different places.
+ */
+ indlen = (xfs_extlen_t)xfs_bmap_worst_indlen(ip, alen);
+ ASSERT(indlen > 0);
+
+ if (rt) {
+ error = xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS,
+ -((int64_t)extsz), 0);
+ } else {
+ error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+ -((int64_t)alen), 0);
+ }
+
+ if (error)
+ goto out_unreserve_quota;
+
+ error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS,
+ -((int64_t)indlen), 0);
+ if (error)
+ goto out_unreserve_blocks;
+
+
+ ip->i_delayed_blks += alen;
+
+ got->br_startoff = aoff;
+ got->br_startblock = nullstartblock(indlen);
+ got->br_blockcount = alen;
+ got->br_state = XFS_EXT_NORM;
+ xfs_bmap_add_extent_hole_delay(ip, lastx, got);
+
+ /*
+ * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay
+ * might have merged it into one of the neighbouring ones.
+ */
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got);
+
+ ASSERT(got->br_startoff <= aoff);
+ ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen);
+ ASSERT(isnullstartblock(got->br_startblock));
+ ASSERT(got->br_state == XFS_EXT_NORM);
+ return 0;
+
+out_unreserve_blocks:
+ if (rt)
+ xfs_mod_incore_sb(mp, XFS_SBS_FREXTENTS, extsz, 0);
+ else
+ xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, alen, 0);
+out_unreserve_quota:
+ if (XFS_IS_QUOTA_ON(mp))
+ xfs_trans_unreserve_quota_nblks(NULL, ip, alen, 0, rt ?
+ XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS);
+ return error;
+}
+
+/*
+ * Map file blocks to filesystem blocks, adding delayed allocations as needed.
+ */
+int
+xfs_bmapi_delay(
+ struct xfs_inode *ip, /* incore inode */
+ xfs_fileoff_t bno, /* starting file offs. mapped */
+ xfs_filblks_t len, /* length to map in file */
+ struct xfs_bmbt_irec *mval, /* output: map values */
+ int *nmap, /* i/o: mval size/count */
+ int flags) /* XFS_BMAPI_... */
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ struct xfs_bmbt_irec got; /* current file extent record */
+ struct xfs_bmbt_irec prev; /* previous file extent record */
+ xfs_fileoff_t obno; /* old block number (offset) */
+ xfs_fileoff_t end; /* end of mapped file region */
+ xfs_extnum_t lastx; /* last useful extent number */
+ int eof; /* we've hit the end of extents */
+ int n = 0; /* current extent index */
+ int error = 0;
+
+ ASSERT(*nmap >= 1);
+ ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+ ASSERT(!(flags & ~XFS_BMAPI_ENTIRE));
+
+ if (unlikely(XFS_TEST_ERROR(
+ (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
+ mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
+ XFS_ERROR_REPORT("xfs_bmapi_delay", XFS_ERRLEVEL_LOW, mp);
+ return XFS_ERROR(EFSCORRUPTED);
+ }
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return XFS_ERROR(EIO);
+
+ XFS_STATS_INC(xs_blk_mapw);
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ if (error)
+ return error;
+ }
+
+ xfs_bmap_search_extents(ip, bno, XFS_DATA_FORK, &eof, &lastx, &got, &prev);
+ end = bno + len;
+ obno = bno;
+
+ while (bno < end && n < *nmap) {
+ if (eof || got.br_startoff > bno) {
+ error = xfs_bmapi_reserve_delalloc(ip, bno, len, &got,
+ &prev, &lastx, eof);
+ if (error) {
+ if (n == 0) {
+ *nmap = 0;
+ return error;
+ }
+ break;
+ }
+ }
+
+ /* set up the extent map to return. */
+ xfs_bmapi_trim_map(mval, &got, &bno, len, obno, end, n, flags);
+ xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
+ /* If we're done, stop now. */
+ if (bno >= end || n >= *nmap)
+ break;
+
+ /* Else go on to the next record. */
+ prev = got;
+ if (++lastx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, lastx), &got);
+ else
+ eof = 1;
+ }
+
+ *nmap = n;
+ return 0;
+}
+
+
+STATIC int
+xfs_bmapi_allocate(
+ struct xfs_bmalloca *bma,
+ int flags)
+{
+ struct xfs_mount *mp = bma->ip->i_mount;
+ int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+ int tmp_logflags = 0;
+ int error;
+ int rt;
+
+ ASSERT(bma->length > 0);
+
+ rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(bma->ip);
+
+ /*
+ * For the wasdelay case, we could also just allocate the stuff asked
+ * for in this bmap call but that wouldn't be as good.
+ */
+ if (bma->wasdel) {
+ bma->length = (xfs_extlen_t)bma->got.br_blockcount;
+ bma->offset = bma->got.br_startoff;
+ if (bma->idx != NULLEXTNUM && bma->idx) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx - 1),
+ &bma->prev);
+ }
+ } else {
+ bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
+ if (!bma->eof)
+ bma->length = XFS_FILBLKS_MIN(bma->length,
+ bma->got.br_startoff - bma->offset);
+ }
+
+ /*
+ * Indicate if this is the first user data in the file, or just any
+ * user data.
+ */
+ if (!(flags & XFS_BMAPI_METADATA)) {
+ bma->userdata = (bma->offset == 0) ?
+ XFS_ALLOC_INITIAL_USER_DATA : XFS_ALLOC_USERDATA;
+ }
+
+ bma->minlen = (flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+
+ /*
+ * Only want to do the alignment at the eof if it is userdata and
+ * allocation length is larger than a stripe unit.
+ */
+ if (mp->m_dalign && bma->length >= mp->m_dalign &&
+ !(flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
+ error = xfs_bmap_isaeof(bma, whichfork);
+ if (error)
+ return error;
+ }
+
+ error = xfs_bmap_alloc(bma);
+ if (error)
+ return error;
+
+ if (bma->flist->xbf_low)
+ bma->minleft = 0;
+ if (bma->cur)
+ bma->cur->bc_private.b.firstblock = *bma->firstblock;
+ if (bma->blkno == NULLFSBLOCK)
+ return 0;
+ if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+ bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
+ bma->cur->bc_private.b.firstblock = *bma->firstblock;
+ bma->cur->bc_private.b.flist = bma->flist;
+ }
+ /*
+ * Bump the number of extents we've allocated
+ * in this call.
+ */
+ bma->nallocs++;
+
+ if (bma->cur)
+ bma->cur->bc_private.b.flags =
+ bma->wasdel ? XFS_BTCUR_BPRV_WASDEL : 0;
+
+ bma->got.br_startoff = bma->offset;
+ bma->got.br_startblock = bma->blkno;
+ bma->got.br_blockcount = bma->length;
+ bma->got.br_state = XFS_EXT_NORM;
+
+ /*
+ * A wasdelay extent has been initialized, so shouldn't be flagged
+ * as unwritten.
+ */
+ if (!bma->wasdel && (flags & XFS_BMAPI_PREALLOC) &&
+ xfs_sb_version_hasextflgbit(&mp->m_sb))
+ bma->got.br_state = XFS_EXT_UNWRITTEN;
+
+ if (bma->wasdel)
+ error = xfs_bmap_add_extent_delay_real(bma);
+ else
+ error = xfs_bmap_add_extent_hole_real(bma, whichfork);
+
+ bma->logflags |= tmp_logflags;
+ if (error)
+ return error;
+
+ /*
+ * Update our extent pointer, given that xfs_bmap_add_extent_delay_real
+ * or xfs_bmap_add_extent_hole_real might have merged it into one of
+ * the neighbouring ones.
+ */
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
+
+ ASSERT(bma->got.br_startoff <= bma->offset);
+ ASSERT(bma->got.br_startoff + bma->got.br_blockcount >=
+ bma->offset + bma->length);
+ ASSERT(bma->got.br_state == XFS_EXT_NORM ||
+ bma->got.br_state == XFS_EXT_UNWRITTEN);
+ return 0;
+}
+
+STATIC int
+xfs_bmapi_convert_unwritten(
+ struct xfs_bmalloca *bma,
+ struct xfs_bmbt_irec *mval,
+ xfs_filblks_t len,
+ int flags)
+{
+ int whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
+ XFS_ATTR_FORK : XFS_DATA_FORK;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(bma->ip, whichfork);
+ int tmp_logflags = 0;
+ int error;
+
+ /* check if we need to do unwritten->real conversion */
+ if (mval->br_state == XFS_EXT_UNWRITTEN &&
+ (flags & XFS_BMAPI_PREALLOC))
+ return 0;
+
+ /* check if we need to do real->unwritten conversion */
+ if (mval->br_state == XFS_EXT_NORM &&
+ (flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) !=
+ (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
+ return 0;
+
+ /*
+ * Modify (by adding) the state flag, if writing.
+ */
+ ASSERT(mval->br_blockcount <= len);
+ if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur) {
+ bma->cur = xfs_bmbt_init_cursor(bma->ip->i_mount, bma->tp,
+ bma->ip, whichfork);
+ bma->cur->bc_private.b.firstblock = *bma->firstblock;
+ bma->cur->bc_private.b.flist = bma->flist;
+ }
+ mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
+ ? XFS_EXT_NORM : XFS_EXT_UNWRITTEN;
+
+ error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx,
+ &bma->cur, mval, bma->firstblock, bma->flist,
+ &tmp_logflags);
+ bma->logflags |= tmp_logflags;
+ if (error)
+ return error;
+
+ /*
+ * Update our extent pointer, given that
+ * xfs_bmap_add_extent_unwritten_real might have merged it into one
+ * of the neighbouring ones.
+ */
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx), &bma->got);
+
+ /*
+ * We may have combined previously unwritten space with written space,
+ * so generate another request.
+ */
+ if (mval->br_blockcount < len)
+ return EAGAIN;
+ return 0;
+}
+
+/*
+ * Map file blocks to filesystem blocks, and allocate blocks or convert the
+ * extent state if necessary. Details behaviour is controlled by the flags
+ * parameter. Only allocates blocks from a single allocation group, to avoid
+ * locking problems.
+ *
* The returned value in "firstblock" from the first call in a transaction
* must be remembered and presented to subsequent calls in "firstblock".
* An upper bound for the number of blocks to be allocated is supplied to
* the first call in "total"; if no allocation group has that many free
* blocks then the call will fail (return NULLFSBLOCK in "firstblock").
*/
-int /* error */
-xfs_bmapi(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- xfs_fileoff_t bno, /* starting file offs. mapped */
- xfs_filblks_t len, /* length to map in file */
- int flags, /* XFS_BMAPI_... */
- xfs_fsblock_t *firstblock, /* first allocated block
- controls a.g. for allocs */
- xfs_extlen_t total, /* total blocks needed */
- xfs_bmbt_irec_t *mval, /* output: map values */
- int *nmap, /* i/o: mval size/count */
- xfs_bmap_free_t *flist) /* i/o: list extents to free */
+int
+xfs_bmapi_write(
+ struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_inode *ip, /* incore inode */
+ xfs_fileoff_t bno, /* starting file offs. mapped */
+ xfs_filblks_t len, /* length to map in file */
+ int flags, /* XFS_BMAPI_... */
+ xfs_fsblock_t *firstblock, /* first allocated block
+ controls a.g. for allocs */
+ xfs_extlen_t total, /* total blocks needed */
+ struct xfs_bmbt_irec *mval, /* output: map values */
+ int *nmap, /* i/o: mval size/count */
+ struct xfs_bmap_free *flist) /* i/o: list extents to free */
{
- xfs_fsblock_t abno; /* allocated block number */
- xfs_extlen_t alen; /* allocated extent length */
- xfs_fileoff_t aoff; /* allocated file offset */
- xfs_bmalloca_t bma = { 0 }; /* args for xfs_bmap_alloc */
- xfs_btree_cur_t *cur; /* bmap btree cursor */
- xfs_fileoff_t end; /* end of mapped file region */
- int eof; /* we've hit the end of extents */
- xfs_bmbt_rec_host_t *ep; /* extent record pointer */
- int error; /* error return */
- xfs_bmbt_irec_t got; /* current file extent record */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extlen_t indlen; /* indirect blocks length */
- xfs_extnum_t lastx; /* last useful extent number */
- int logflags; /* flags for transaction logging */
- xfs_extlen_t minleft; /* min blocks left after allocation */
- xfs_extlen_t minlen; /* min allocation size */
- xfs_mount_t *mp; /* xfs mount structure */
- int n; /* current extent index */
- int nallocs; /* number of extents alloc'd */
- xfs_extnum_t nextents; /* number of extents in file */
- xfs_fileoff_t obno; /* old block number (offset) */
- xfs_bmbt_irec_t prev; /* previous file extent record */
- int tmp_logflags; /* temp flags holder */
- int whichfork; /* data or attr fork */
- char inhole; /* current location is hole in file */
- char wasdelay; /* old extent was delayed */
- char wr; /* this is a write request */
- char rt; /* this is a realtime file */
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp;
+ struct xfs_bmalloca bma = { 0 }; /* args for xfs_bmap_alloc */
+ xfs_fileoff_t end; /* end of mapped file region */
+ int eof; /* after the end of extents */
+ int error; /* error return */
+ int n; /* current extent index */
+ xfs_fileoff_t obno; /* old block number (offset) */
+ int whichfork; /* data or attr fork */
+ char inhole; /* current location is hole in file */
+ char wasdelay; /* old extent was delayed */
+
#ifdef DEBUG
- xfs_fileoff_t orig_bno; /* original block number value */
- int orig_flags; /* original flags arg value */
- xfs_filblks_t orig_len; /* original value of len arg */
- xfs_bmbt_irec_t *orig_mval; /* original value of mval */
- int orig_nmap; /* original value of *nmap */
+ xfs_fileoff_t orig_bno; /* original block number value */
+ int orig_flags; /* original flags arg value */
+ xfs_filblks_t orig_len; /* original value of len arg */
+ struct xfs_bmbt_irec *orig_mval; /* original value of mval */
+ int orig_nmap; /* original value of *nmap */
orig_bno = bno;
orig_len = len;
@@ -4355,488 +4848,147 @@ xfs_bmapi(
orig_mval = mval;
orig_nmap = *nmap;
#endif
+
ASSERT(*nmap >= 1);
- ASSERT(*nmap <= XFS_BMAP_MAX_NMAP || !(flags & XFS_BMAPI_WRITE));
+ ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
+ ASSERT(!(flags & XFS_BMAPI_IGSTATE));
+ ASSERT(tp != NULL);
+ ASSERT(len > 0);
+
whichfork = (flags & XFS_BMAPI_ATTRFORK) ?
XFS_ATTR_FORK : XFS_DATA_FORK;
- mp = ip->i_mount;
+
if (unlikely(XFS_TEST_ERROR(
(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL),
mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) {
- XFS_ERROR_REPORT("xfs_bmapi", XFS_ERRLEVEL_LOW, mp);
+ XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
return XFS_ERROR(EFSCORRUPTED);
}
+
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
- rt = (whichfork == XFS_DATA_FORK) && XFS_IS_REALTIME_INODE(ip);
+
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(ifp->if_ext_max ==
XFS_IFORK_SIZE(ip, whichfork) / (uint)sizeof(xfs_bmbt_rec_t));
- if ((wr = (flags & XFS_BMAPI_WRITE)) != 0)
- XFS_STATS_INC(xs_blk_mapw);
- else
- XFS_STATS_INC(xs_blk_mapr);
- /*
- * IGSTATE flag is used to combine extents which
- * differ only due to the state of the extents.
- * This technique is used from xfs_getbmap()
- * when the caller does not wish to see the
- * separation (which is the default).
- *
- * This technique is also used when writing a
- * buffer which has been partially written,
- * (usually by being flushed during a chunkread),
- * to ensure one write takes place. This also
- * prevents a change in the xfs inode extents at
- * this time, intentionally. This change occurs
- * on completion of the write operation, in
- * xfs_strat_comp(), where the xfs_bmapi() call
- * is transactioned, and the extents combined.
- */
- if ((flags & XFS_BMAPI_IGSTATE) && wr) /* if writing unwritten space */
- wr = 0; /* no allocations are allowed */
- ASSERT(wr || !(flags & XFS_BMAPI_DELAY));
- logflags = 0;
- nallocs = 0;
- cur = NULL;
+
+ XFS_STATS_INC(xs_blk_mapw);
+
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
- ASSERT(wr && tp);
- if ((error = xfs_bmap_local_to_extents(tp, ip,
- firstblock, total, &logflags, whichfork)))
+ error = xfs_bmap_local_to_extents(tp, ip, firstblock, total,
+ &bma.logflags, whichfork);
+ if (error)
goto error0;
}
- if (wr && *firstblock == NULLFSBLOCK) {
+
+ if (*firstblock == NULLFSBLOCK) {
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE)
- minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
+ bma.minleft = be16_to_cpu(ifp->if_broot->bb_level) + 1;
else
- minleft = 1;
- } else
- minleft = 0;
- if (!(ifp->if_flags & XFS_IFEXTENTS) &&
- (error = xfs_iread_extents(tp, ip, whichfork)))
- goto error0;
- ep = xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
- &prev);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ bma.minleft = 1;
+ } else {
+ bma.minleft = 0;
+ }
+
+ if (!(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(tp, ip, whichfork);
+ if (error)
+ goto error0;
+ }
+
+ xfs_bmap_search_extents(ip, bno, whichfork, &eof, &bma.idx, &bma.got,
+ &bma.prev);
n = 0;
end = bno + len;
obno = bno;
- bma.ip = NULL;
+
+ bma.tp = tp;
+ bma.ip = ip;
+ bma.total = total;
+ bma.userdata = 0;
+ bma.flist = flist;
+ bma.firstblock = firstblock;
while (bno < end && n < *nmap) {
- /*
- * Reading past eof, act as though there's a hole
- * up to end.
- */
- if (eof && !wr)
- got.br_startoff = end;
- inhole = eof || got.br_startoff > bno;
- wasdelay = wr && !inhole && !(flags & XFS_BMAPI_DELAY) &&
- isnullstartblock(got.br_startblock);
+ inhole = eof || bma.got.br_startoff > bno;
+ wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
+
/*
* First, deal with the hole before the allocated space
* that we found, if any.
*/
- if (wr && (inhole || wasdelay)) {
- /*
- * For the wasdelay case, we could also just
- * allocate the stuff asked for in this bmap call
- * but that wouldn't be as good.
- */
- if (wasdelay) {
- alen = (xfs_extlen_t)got.br_blockcount;
- aoff = got.br_startoff;
- if (lastx != NULLEXTNUM && lastx) {
- ep = xfs_iext_get_ext(ifp, lastx - 1);
- xfs_bmbt_get_all(ep, &prev);
- }
- } else {
- alen = (xfs_extlen_t)
- XFS_FILBLKS_MIN(len, MAXEXTLEN);
- if (!eof)
- alen = (xfs_extlen_t)
- XFS_FILBLKS_MIN(alen,
- got.br_startoff - bno);
- aoff = bno;
- }
- minlen = (flags & XFS_BMAPI_CONTIG) ? alen : 1;
- if (flags & XFS_BMAPI_DELAY) {
- xfs_extlen_t extsz;
-
- /* Figure out the extent size, adjust alen */
- extsz = xfs_get_extsz_hint(ip);
- if (extsz) {
- /*
- * make sure we don't exceed a single
- * extent length when we align the
- * extent by reducing length we are
- * going to allocate by the maximum
- * amount extent size aligment may
- * require.
- */
- alen = XFS_FILBLKS_MIN(len,
- MAXEXTLEN - (2 * extsz - 1));
- error = xfs_bmap_extsize_align(mp,
- &got, &prev, extsz,
- rt, eof,
- flags&XFS_BMAPI_DELAY,
- flags&XFS_BMAPI_CONVERT,
- &aoff, &alen);
- ASSERT(!error);
- }
-
- if (rt)
- extsz = alen / mp->m_sb.sb_rextsize;
-
- /*
- * Make a transaction-less quota reservation for
- * delayed allocation blocks. This number gets
- * adjusted later. We return if we haven't
- * allocated blocks already inside this loop.
- */
- error = xfs_trans_reserve_quota_nblks(
- NULL, ip, (long)alen, 0,
- rt ? XFS_QMOPT_RES_RTBLKS :
- XFS_QMOPT_RES_REGBLKS);
- if (error) {
- if (n == 0) {
- *nmap = 0;
- ASSERT(cur == NULL);
- return error;
- }
- break;
- }
+ if (inhole || wasdelay) {
+ bma.eof = eof;
+ bma.conv = !!(flags & XFS_BMAPI_CONVERT);
+ bma.wasdel = wasdelay;
+ bma.offset = bno;
- /*
- * Split changing sb for alen and indlen since
- * they could be coming from different places.
- */
- indlen = (xfs_extlen_t)
- xfs_bmap_worst_indlen(ip, alen);
- ASSERT(indlen > 0);
-
- if (rt) {
- error = xfs_mod_incore_sb(mp,
- XFS_SBS_FREXTENTS,
- -((int64_t)extsz), 0);
- } else {
- error = xfs_icsb_modify_counters(mp,
- XFS_SBS_FDBLOCKS,
- -((int64_t)alen), 0);
- }
- if (!error) {
- error = xfs_icsb_modify_counters(mp,
- XFS_SBS_FDBLOCKS,
- -((int64_t)indlen), 0);
- if (error && rt)
- xfs_mod_incore_sb(mp,
- XFS_SBS_FREXTENTS,
- (int64_t)extsz, 0);
- else if (error)
- xfs_icsb_modify_counters(mp,
- XFS_SBS_FDBLOCKS,
- (int64_t)alen, 0);
- }
-
- if (error) {
- if (XFS_IS_QUOTA_ON(mp))
- /* unreserve the blocks now */
- (void)
- xfs_trans_unreserve_quota_nblks(
- NULL, ip,
- (long)alen, 0, rt ?
- XFS_QMOPT_RES_RTBLKS :
- XFS_QMOPT_RES_REGBLKS);
- break;
- }
-
- ip->i_delayed_blks += alen;
- abno = nullstartblock(indlen);
- } else {
- /*
- * If first time, allocate and fill in
- * once-only bma fields.
- */
- if (bma.ip == NULL) {
- bma.tp = tp;
- bma.ip = ip;
- bma.prevp = &prev;
- bma.gotp = &got;
- bma.total = total;
- bma.userdata = 0;
- }
- /* Indicate if this is the first user data
- * in the file, or just any user data.
- */
- if (!(flags & XFS_BMAPI_METADATA)) {
- bma.userdata = (aoff == 0) ?
- XFS_ALLOC_INITIAL_USER_DATA :
- XFS_ALLOC_USERDATA;
- }
- /*
- * Fill in changeable bma fields.
- */
- bma.eof = eof;
- bma.firstblock = *firstblock;
- bma.alen = alen;
- bma.off = aoff;
- bma.conv = !!(flags & XFS_BMAPI_CONVERT);
- bma.wasdel = wasdelay;
- bma.minlen = minlen;
- bma.low = flist->xbf_low;
- bma.minleft = minleft;
- /*
- * Only want to do the alignment at the
- * eof if it is userdata and allocation length
- * is larger than a stripe unit.
- */
- if (mp->m_dalign && alen >= mp->m_dalign &&
- (!(flags & XFS_BMAPI_METADATA)) &&
- (whichfork == XFS_DATA_FORK)) {
- if ((error = xfs_bmap_isaeof(ip, aoff,
- whichfork, &bma.aeof)))
- goto error0;
- } else
- bma.aeof = 0;
- /*
- * Call allocator.
- */
- if ((error = xfs_bmap_alloc(&bma)))
- goto error0;
- /*
- * Copy out result fields.
- */
- abno = bma.rval;
- if ((flist->xbf_low = bma.low))
- minleft = 0;
- alen = bma.alen;
- aoff = bma.off;
- ASSERT(*firstblock == NULLFSBLOCK ||
- XFS_FSB_TO_AGNO(mp, *firstblock) ==
- XFS_FSB_TO_AGNO(mp, bma.firstblock) ||
- (flist->xbf_low &&
- XFS_FSB_TO_AGNO(mp, *firstblock) <
- XFS_FSB_TO_AGNO(mp, bma.firstblock)));
- *firstblock = bma.firstblock;
- if (cur)
- cur->bc_private.b.firstblock =
- *firstblock;
- if (abno == NULLFSBLOCK)
- break;
- if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
- cur = xfs_bmbt_init_cursor(mp, tp,
- ip, whichfork);
- cur->bc_private.b.firstblock =
- *firstblock;
- cur->bc_private.b.flist = flist;
- }
- /*
- * Bump the number of extents we've allocated
- * in this call.
- */
- nallocs++;
- }
- if (cur)
- cur->bc_private.b.flags =
- wasdelay ? XFS_BTCUR_BPRV_WASDEL : 0;
- got.br_startoff = aoff;
- got.br_startblock = abno;
- got.br_blockcount = alen;
- got.br_state = XFS_EXT_NORM; /* assume normal */
- /*
- * Determine state of extent, and the filesystem.
- * A wasdelay extent has been initialized, so
- * shouldn't be flagged as unwritten.
- */
- if (wr && xfs_sb_version_hasextflgbit(&mp->m_sb)) {
- if (!wasdelay && (flags & XFS_BMAPI_PREALLOC))
- got.br_state = XFS_EXT_UNWRITTEN;
- }
- error = xfs_bmap_add_extent(ip, &lastx, &cur, &got,
- firstblock, flist, &tmp_logflags,
- whichfork);
- logflags |= tmp_logflags;
- if (error)
- goto error0;
- ep = xfs_iext_get_ext(ifp, lastx);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- xfs_bmbt_get_all(ep, &got);
- ASSERT(got.br_startoff <= aoff);
- ASSERT(got.br_startoff + got.br_blockcount >=
- aoff + alen);
-#ifdef DEBUG
- if (flags & XFS_BMAPI_DELAY) {
- ASSERT(isnullstartblock(got.br_startblock));
- ASSERT(startblockval(got.br_startblock) > 0);
- }
- ASSERT(got.br_state == XFS_EXT_NORM ||
- got.br_state == XFS_EXT_UNWRITTEN);
-#endif
- /*
- * Fall down into the found allocated space case.
- */
- } else if (inhole) {
- /*
- * Reading in a hole.
- */
- mval->br_startoff = bno;
- mval->br_startblock = HOLESTARTBLOCK;
- mval->br_blockcount =
- XFS_FILBLKS_MIN(len, got.br_startoff - bno);
- mval->br_state = XFS_EXT_NORM;
- bno += mval->br_blockcount;
- len -= mval->br_blockcount;
- mval++;
- n++;
- continue;
- }
- /*
- * Then deal with the allocated space we found.
- */
- ASSERT(ep != NULL);
- if (!(flags & XFS_BMAPI_ENTIRE) &&
- (got.br_startoff + got.br_blockcount > obno)) {
- if (obno > bno)
- bno = obno;
- ASSERT((bno >= obno) || (n == 0));
- ASSERT(bno < end);
- mval->br_startoff = bno;
- if (isnullstartblock(got.br_startblock)) {
- ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
- mval->br_startblock = DELAYSTARTBLOCK;
- } else
- mval->br_startblock =
- got.br_startblock +
- (bno - got.br_startoff);
/*
- * Return the minimum of what we got and what we
- * asked for for the length. We can use the len
- * variable here because it is modified below
- * and we could have been there before coming
- * here if the first part of the allocation
- * didn't overlap what was asked for.
+ * There's a 32/64 bit type mismatch between the
+ * allocation length request (which can be 64 bits in
+ * length) and the bma length request, which is
+ * xfs_extlen_t and therefore 32 bits. Hence we have to
+ * check for 32-bit overflows and handle them here.
*/
- mval->br_blockcount =
- XFS_FILBLKS_MIN(end - bno, got.br_blockcount -
- (bno - got.br_startoff));
- mval->br_state = got.br_state;
- ASSERT(mval->br_blockcount <= len);
- } else {
- *mval = got;
- if (isnullstartblock(mval->br_startblock)) {
- ASSERT(!wr || (flags & XFS_BMAPI_DELAY));
- mval->br_startblock = DELAYSTARTBLOCK;
- }
- }
+ if (len > (xfs_filblks_t)MAXEXTLEN)
+ bma.length = MAXEXTLEN;
+ else
+ bma.length = len;
- /*
- * Check if writing previously allocated but
- * unwritten extents.
- */
- if (wr &&
- ((mval->br_state == XFS_EXT_UNWRITTEN &&
- ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) ||
- (mval->br_state == XFS_EXT_NORM &&
- ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) ==
- (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) {
- /*
- * Modify (by adding) the state flag, if writing.
- */
- ASSERT(mval->br_blockcount <= len);
- if ((ifp->if_flags & XFS_IFBROOT) && !cur) {
- cur = xfs_bmbt_init_cursor(mp,
- tp, ip, whichfork);
- cur->bc_private.b.firstblock =
- *firstblock;
- cur->bc_private.b.flist = flist;
- }
- mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN)
- ? XFS_EXT_NORM
- : XFS_EXT_UNWRITTEN;
- error = xfs_bmap_add_extent(ip, &lastx, &cur, mval,
- firstblock, flist, &tmp_logflags,
- whichfork);
- logflags |= tmp_logflags;
+ ASSERT(len > 0);
+ ASSERT(bma.length > 0);
+ error = xfs_bmapi_allocate(&bma, flags);
if (error)
goto error0;
- ep = xfs_iext_get_ext(ifp, lastx);
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- xfs_bmbt_get_all(ep, &got);
- /*
- * We may have combined previously unwritten
- * space with written space, so generate
- * another request.
- */
- if (mval->br_blockcount < len)
- continue;
+ if (bma.blkno == NULLFSBLOCK)
+ break;
}
- ASSERT((flags & XFS_BMAPI_ENTIRE) ||
- ((mval->br_startoff + mval->br_blockcount) <= end));
- ASSERT((flags & XFS_BMAPI_ENTIRE) ||
- (mval->br_blockcount <= len) ||
- (mval->br_startoff < obno));
- bno = mval->br_startoff + mval->br_blockcount;
- len = end - bno;
- if (n > 0 && mval->br_startoff == mval[-1].br_startoff) {
- ASSERT(mval->br_startblock == mval[-1].br_startblock);
- ASSERT(mval->br_blockcount > mval[-1].br_blockcount);
- ASSERT(mval->br_state == mval[-1].br_state);
- mval[-1].br_blockcount = mval->br_blockcount;
- mval[-1].br_state = mval->br_state;
- } else if (n > 0 && mval->br_startblock != DELAYSTARTBLOCK &&
- mval[-1].br_startblock != DELAYSTARTBLOCK &&
- mval[-1].br_startblock != HOLESTARTBLOCK &&
- mval->br_startblock ==
- mval[-1].br_startblock + mval[-1].br_blockcount &&
- ((flags & XFS_BMAPI_IGSTATE) ||
- mval[-1].br_state == mval->br_state)) {
- ASSERT(mval->br_startoff ==
- mval[-1].br_startoff + mval[-1].br_blockcount);
- mval[-1].br_blockcount += mval->br_blockcount;
- } else if (n > 0 &&
- mval->br_startblock == DELAYSTARTBLOCK &&
- mval[-1].br_startblock == DELAYSTARTBLOCK &&
- mval->br_startoff ==
- mval[-1].br_startoff + mval[-1].br_blockcount) {
- mval[-1].br_blockcount += mval->br_blockcount;
- mval[-1].br_state = mval->br_state;
- } else if (!((n == 0) &&
- ((mval->br_startoff + mval->br_blockcount) <=
- obno))) {
- mval++;
- n++;
- }
+ /* Deal with the allocated space we found. */
+ xfs_bmapi_trim_map(mval, &bma.got, &bno, len, obno,
+ end, n, flags);
+
+ /* Execute unwritten extent conversion if necessary */
+ error = xfs_bmapi_convert_unwritten(&bma, mval, len, flags);
+ if (error == EAGAIN)
+ continue;
+ if (error)
+ goto error0;
+
+ /* update the extent map to return */
+ xfs_bmapi_update_map(&mval, &bno, &len, obno, end, &n, flags);
+
/*
* If we're done, stop now. Stop when we've allocated
* XFS_BMAP_MAX_NMAP extents no matter what. Otherwise
* the transaction may get too big.
*/
- if (bno >= end || n >= *nmap || nallocs >= *nmap)
+ if (bno >= end || n >= *nmap || bma.nallocs >= *nmap)
break;
- /*
- * Else go on to the next record.
- */
- prev = got;
- if (++lastx < nextents) {
- ep = xfs_iext_get_ext(ifp, lastx);
- xfs_bmbt_get_all(ep, &got);
- } else {
+
+ /* Else go on to the next record. */
+ bma.prev = bma.got;
+ if (++bma.idx < ifp->if_bytes / sizeof(xfs_bmbt_rec_t)) {
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma.idx),
+ &bma.got);
+ } else
eof = 1;
- }
}
*nmap = n;
+
/*
* Transform from btree to extents, give it cur.
*/
- if (tp && XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
+ if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE &&
XFS_IFORK_NEXTENTS(ip, whichfork) <= ifp->if_ext_max) {
- ASSERT(wr && cur);
- error = xfs_bmap_btree_to_extents(tp, ip, cur,
+ int tmp_logflags = 0;
+
+ ASSERT(bma.cur);
+ error = xfs_bmap_btree_to_extents(tp, ip, bma.cur,
&tmp_logflags, whichfork);
- logflags |= tmp_logflags;
+ bma.logflags |= tmp_logflags;
if (error)
goto error0;
}
@@ -4850,34 +5002,33 @@ error0:
* Log everything. Do this after conversion, there's no point in
* logging the extent records if we've converted to btree format.
*/
- if ((logflags & xfs_ilog_fext(whichfork)) &&
+ if ((bma.logflags & xfs_ilog_fext(whichfork)) &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
- logflags &= ~xfs_ilog_fext(whichfork);
- else if ((logflags & xfs_ilog_fbroot(whichfork)) &&
+ bma.logflags &= ~xfs_ilog_fext(whichfork);
+ else if ((bma.logflags & xfs_ilog_fbroot(whichfork)) &&
XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)
- logflags &= ~xfs_ilog_fbroot(whichfork);
+ bma.logflags &= ~xfs_ilog_fbroot(whichfork);
/*
* Log whatever the flags say, even if error. Otherwise we might miss
* detecting a case where the data is changed, there's an error,
* and it's not logged so we don't shutdown when we should.
*/
- if (logflags) {
- ASSERT(tp && wr);
- xfs_trans_log_inode(tp, ip, logflags);
- }
- if (cur) {
+ if (bma.logflags)
+ xfs_trans_log_inode(tp, ip, bma.logflags);
+
+ if (bma.cur) {
if (!error) {
ASSERT(*firstblock == NULLFSBLOCK ||
XFS_FSB_TO_AGNO(mp, *firstblock) ==
XFS_FSB_TO_AGNO(mp,
- cur->bc_private.b.firstblock) ||
+ bma.cur->bc_private.b.firstblock) ||
(flist->xbf_low &&
XFS_FSB_TO_AGNO(mp, *firstblock) <
XFS_FSB_TO_AGNO(mp,
- cur->bc_private.b.firstblock)));
- *firstblock = cur->bc_private.b.firstblock;
+ bma.cur->bc_private.b.firstblock)));
+ *firstblock = bma.cur->bc_private.b.firstblock;
}
- xfs_btree_del_cursor(cur,
+ xfs_btree_del_cursor(bma.cur,
error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
}
if (!error)
@@ -4887,58 +5038,6 @@ error0:
}
/*
- * Map file blocks to filesystem blocks, simple version.
- * One block (extent) only, read-only.
- * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
- * For the other flag values, the effect is as if XFS_BMAPI_METADATA
- * was set and all the others were clear.
- */
-int /* error */
-xfs_bmapi_single(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_inode_t *ip, /* incore inode */
- int whichfork, /* data or attr fork */
- xfs_fsblock_t *fsb, /* output: mapped block */
- xfs_fileoff_t bno) /* starting file offs. mapped */
-{
- int eof; /* we've hit the end of extents */
- int error; /* error return */
- xfs_bmbt_irec_t got; /* current file extent record */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_extnum_t lastx; /* last useful extent number */
- xfs_bmbt_irec_t prev; /* previous file extent record */
-
- ifp = XFS_IFORK_PTR(ip, whichfork);
- if (unlikely(
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)) {
- XFS_ERROR_REPORT("xfs_bmapi_single", XFS_ERRLEVEL_LOW,
- ip->i_mount);
- return XFS_ERROR(EFSCORRUPTED);
- }
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return XFS_ERROR(EIO);
- XFS_STATS_INC(xs_blk_mapr);
- if (!(ifp->if_flags & XFS_IFEXTENTS) &&
- (error = xfs_iread_extents(tp, ip, whichfork)))
- return error;
- (void)xfs_bmap_search_extents(ip, bno, whichfork, &eof, &lastx, &got,
- &prev);
- /*
- * Reading past eof, act as though there's a hole
- * up to end.
- */
- if (eof || got.br_startoff > bno) {
- *fsb = NULLFSBLOCK;
- return 0;
- }
- ASSERT(!isnullstartblock(got.br_startblock));
- ASSERT(bno < got.br_startoff + got.br_blockcount);
- *fsb = got.br_startblock + (bno - got.br_startoff);
- return 0;
-}
-
-/*
* Unmap (remove) blocks from a file.
* If nexts is nonzero then the number of extents to remove is limited to
* that value. If not all extents in the block range can be removed then
@@ -5109,9 +5208,9 @@ xfs_bunmapi(
del.br_blockcount = mod;
}
del.br_state = XFS_EXT_UNWRITTEN;
- error = xfs_bmap_add_extent(ip, &lastx, &cur, &del,
- firstblock, flist, &logflags,
- XFS_DATA_FORK);
+ error = xfs_bmap_add_extent_unwritten_real(tp, ip,
+ &lastx, &cur, &del, firstblock, flist,
+ &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5167,18 +5266,18 @@ xfs_bunmapi(
}
prev.br_state = XFS_EXT_UNWRITTEN;
lastx--;
- error = xfs_bmap_add_extent(ip, &lastx, &cur,
- &prev, firstblock, flist, &logflags,
- XFS_DATA_FORK);
+ error = xfs_bmap_add_extent_unwritten_real(tp,
+ ip, &lastx, &cur, &prev,
+ firstblock, flist, &logflags);
if (error)
goto error0;
goto nodelete;
} else {
ASSERT(del.br_state == XFS_EXT_NORM);
del.br_state = XFS_EXT_UNWRITTEN;
- error = xfs_bmap_add_extent(ip, &lastx, &cur,
- &del, firstblock, flist, &logflags,
- XFS_DATA_FORK);
+ error = xfs_bmap_add_extent_unwritten_real(tp,
+ ip, &lastx, &cur, &del,
+ firstblock, flist, &logflags);
if (error)
goto error0;
goto nodelete;
@@ -5500,10 +5599,9 @@ xfs_getbmap(
do {
nmap = (nexleft > subnex) ? subnex : nexleft;
- error = xfs_bmapi(NULL, ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
- XFS_BB_TO_FSB(mp, bmv->bmv_length),
- bmapi_flags, NULL, 0, map, &nmap,
- NULL);
+ error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
+ XFS_BB_TO_FSB(mp, bmv->bmv_length),
+ map, &nmap, bmapi_flags);
if (error)
goto out_free_map;
ASSERT(nmap <= subnex);
@@ -5577,89 +5675,6 @@ xfs_getbmap(
return error;
}
-/*
- * Check the last inode extent to determine whether this allocation will result
- * in blocks being allocated at the end of the file. When we allocate new data
- * blocks at the end of the file which do not start at the previous data block,
- * we will try to align the new blocks at stripe unit boundaries.
- */
-STATIC int /* error */
-xfs_bmap_isaeof(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_fileoff_t off, /* file offset in fsblocks */
- int whichfork, /* data or attribute fork */
- char *aeof) /* return value */
-{
- int error; /* error return value */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */
- xfs_extnum_t nextents; /* number of file extents */
- xfs_bmbt_irec_t s; /* expanded extent record */
-
- ASSERT(whichfork == XFS_DATA_FORK);
- ifp = XFS_IFORK_PTR(ip, whichfork);
- if (!(ifp->if_flags & XFS_IFEXTENTS) &&
- (error = xfs_iread_extents(NULL, ip, whichfork)))
- return error;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- if (nextents == 0) {
- *aeof = 1;
- return 0;
- }
- /*
- * Go to the last extent
- */
- lastrec = xfs_iext_get_ext(ifp, nextents - 1);
- xfs_bmbt_get_all(lastrec, &s);
- /*
- * Check we are allocating in the last extent (for delayed allocations)
- * or past the last extent for non-delayed allocations.
- */
- *aeof = (off >= s.br_startoff &&
- off < s.br_startoff + s.br_blockcount &&
- isnullstartblock(s.br_startblock)) ||
- off >= s.br_startoff + s.br_blockcount;
- return 0;
-}
-
-/*
- * Check if the endoff is outside the last extent. If so the caller will grow
- * the allocation to a stripe unit boundary.
- */
-int /* error */
-xfs_bmap_eof(
- xfs_inode_t *ip, /* incore inode pointer */
- xfs_fileoff_t endoff, /* file offset in fsblocks */
- int whichfork, /* data or attribute fork */
- int *eof) /* result value */
-{
- xfs_fsblock_t blockcount; /* extent block count */
- int error; /* error return value */
- xfs_ifork_t *ifp; /* inode fork pointer */
- xfs_bmbt_rec_host_t *lastrec; /* extent record pointer */
- xfs_extnum_t nextents; /* number of file extents */
- xfs_fileoff_t startoff; /* extent starting file offset */
-
- ASSERT(whichfork == XFS_DATA_FORK);
- ifp = XFS_IFORK_PTR(ip, whichfork);
- if (!(ifp->if_flags & XFS_IFEXTENTS) &&
- (error = xfs_iread_extents(NULL, ip, whichfork)))
- return error;
- nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
- if (nextents == 0) {
- *eof = 1;
- return 0;
- }
- /*
- * Go to the last extent
- */
- lastrec = xfs_iext_get_ext(ifp, nextents - 1);
- startoff = xfs_bmbt_get_startoff(lastrec);
- blockcount = xfs_bmbt_get_blockcount(lastrec);
- *eof = endoff >= startoff + blockcount;
- return 0;
-}
-
#ifdef DEBUG
STATIC struct xfs_buf *
xfs_bmap_get_bp(
@@ -6094,9 +6109,8 @@ xfs_bmap_punch_delalloc_range(
* trying to remove a real extent (which requires a
* transaction) or a hole, which is probably a bad idea...
*/
- error = xfs_bmapi(NULL, ip, start_fsb, 1,
- XFS_BMAPI_ENTIRE, NULL, 0, &imap,
- &nimaps, NULL);
+ error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
+ XFS_BMAPI_ENTIRE);
if (error) {
/* something screwed, just bail */
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index c62234b..89ee672 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -62,27 +62,23 @@ typedef struct xfs_bmap_free
#define XFS_BMAP_MAX_NMAP 4
/*
- * Flags for xfs_bmapi
+ * Flags for xfs_bmapi_*
*/
-#define XFS_BMAPI_WRITE 0x001 /* write operation: allocate space */
-#define XFS_BMAPI_DELAY 0x002 /* delayed write operation */
-#define XFS_BMAPI_ENTIRE 0x004 /* return entire extent, not trimmed */
-#define XFS_BMAPI_METADATA 0x008 /* mapping metadata not user data */
-#define XFS_BMAPI_ATTRFORK 0x010 /* use attribute fork not data */
-#define XFS_BMAPI_PREALLOC 0x040 /* preallocation op: unwritten space */
-#define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */
+#define XFS_BMAPI_ENTIRE 0x001 /* return entire extent, not trimmed */
+#define XFS_BMAPI_METADATA 0x002 /* mapping metadata not user data */
+#define XFS_BMAPI_ATTRFORK 0x004 /* use attribute fork not data */
+#define XFS_BMAPI_PREALLOC 0x008 /* preallocation op: unwritten space */
+#define XFS_BMAPI_IGSTATE 0x010 /* Ignore state - */
/* combine contig. space */
-#define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */
+#define XFS_BMAPI_CONTIG 0x020 /* must allocate only one extent */
/*
* unwritten extent conversion - this needs write cache flushing and no additional
* allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts
* from written to unwritten, otherwise convert from unwritten to written.
*/
-#define XFS_BMAPI_CONVERT 0x200
+#define XFS_BMAPI_CONVERT 0x040
#define XFS_BMAPI_FLAGS \
- { XFS_BMAPI_WRITE, "WRITE" }, \
- { XFS_BMAPI_DELAY, "DELAY" }, \
{ XFS_BMAPI_ENTIRE, "ENTIRE" }, \
{ XFS_BMAPI_METADATA, "METADATA" }, \
{ XFS_BMAPI_ATTRFORK, "ATTRFORK" }, \
@@ -113,21 +109,28 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)
* Argument structure for xfs_bmap_alloc.
*/
typedef struct xfs_bmalloca {
- xfs_fsblock_t firstblock; /* i/o first block allocated */
- xfs_fsblock_t rval; /* starting block of new extent */
- xfs_fileoff_t off; /* offset in file filling in */
+ xfs_fsblock_t *firstblock; /* i/o first block allocated */
+ struct xfs_bmap_free *flist; /* bmap freelist */
struct xfs_trans *tp; /* transaction pointer */
struct xfs_inode *ip; /* incore inode pointer */
- struct xfs_bmbt_irec *prevp; /* extent before the new one */
- struct xfs_bmbt_irec *gotp; /* extent after, or delayed */
- xfs_extlen_t alen; /* i/o length asked/allocated */
+ struct xfs_bmbt_irec prev; /* extent before the new one */
+ struct xfs_bmbt_irec got; /* extent after, or delayed */
+
+ xfs_fileoff_t offset; /* offset in file filling in */
+ xfs_extlen_t length; /* i/o length asked/allocated */
+ xfs_fsblock_t blkno; /* starting block of new extent */
+
+ struct xfs_btree_cur *cur; /* btree cursor */
+ xfs_extnum_t idx; /* current extent index */
+ int nallocs;/* number of extents alloc'd */
+ int logflags;/* flags for transaction logging */
+
xfs_extlen_t total; /* total blocks needed for xaction */
xfs_extlen_t minlen; /* minimum allocation size (blocks) */
xfs_extlen_t minleft; /* amount must be left after alloc */
char eof; /* set if allocating past last extent */
char wasdel; /* replacing a delayed allocation */
char userdata;/* set if is user data */
- char low; /* low on space, using seq'l ags */
char aeof; /* allocated space at eof */
char conv; /* overwriting unwritten extents */
} xfs_bmalloca_t;
@@ -152,251 +155,62 @@ typedef struct xfs_bmalloca {
{ BMAP_RIGHT_FILLING, "RF" }, \
{ BMAP_ATTRFORK, "ATTR" }
-/*
- * Add bmap trace insert entries for all the contents of the extent list.
- *
- * Quite excessive tracing. Only do this for debug builds.
- */
#if defined(__KERNEL) && defined(DEBUG)
-void
-xfs_bmap_trace_exlist(
- struct xfs_inode *ip, /* incore inode pointer */
- xfs_extnum_t cnt, /* count of entries in list */
- int whichfork,
- unsigned long caller_ip); /* data or attr fork */
+void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
+ int whichfork, unsigned long caller_ip);
#define XFS_BMAP_TRACE_EXLIST(ip,c,w) \
xfs_bmap_trace_exlist(ip,c,w, _THIS_IP_)
#else
#define XFS_BMAP_TRACE_EXLIST(ip,c,w)
#endif
-/*
- * Convert inode from non-attributed to attributed.
- * Must not be in a transaction, ip must not be locked.
- */
-int /* error code */
-xfs_bmap_add_attrfork(
- struct xfs_inode *ip, /* incore inode pointer */
- int size, /* space needed for new attribute */
- int rsvd); /* flag for reserved block allocation */
-
-/*
- * Add the extent to the list of extents to be free at transaction end.
- * The list is maintained sorted (by block number).
- */
-void
-xfs_bmap_add_free(
- xfs_fsblock_t bno, /* fs block number of extent */
- xfs_filblks_t len, /* length of extent */
- xfs_bmap_free_t *flist, /* list of extents */
- struct xfs_mount *mp); /* mount point structure */
-
-/*
- * Routine to clean up the free list data structure when
- * an error occurs during a transaction.
- */
-void
-xfs_bmap_cancel(
- xfs_bmap_free_t *flist); /* free list to clean up */
-
-/*
- * Compute and fill in the value of the maximum depth of a bmap btree
- * in this filesystem. Done once, during mount.
- */
-void
-xfs_bmap_compute_maxlevels(
- struct xfs_mount *mp, /* file system mount structure */
- int whichfork); /* data or attr fork */
-
-/*
- * Returns the file-relative block number of the first unused block in the file.
- * This is the lowest-address hole if the file has holes, else the first block
- * past the end of file.
- */
-int /* error */
-xfs_bmap_first_unused(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- xfs_extlen_t len, /* size of hole to find */
- xfs_fileoff_t *unused, /* unused block num */
- int whichfork); /* data or attr fork */
-
-/*
- * Returns the file-relative block number of the last block + 1 before
- * last_block (input value) in the file.
- * This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
- */
-int /* error */
-xfs_bmap_last_before(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- xfs_fileoff_t *last_block, /* last block */
- int whichfork); /* data or attr fork */
-
-/*
- * Returns the file-relative block number of the first block past eof in
- * the file. This is not based on i_size, it is based on the extent list.
- * Returns 0 for local files, as they do not have an extent list.
- */
-int /* error */
-xfs_bmap_last_offset(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- xfs_fileoff_t *unused, /* last block num */
- int whichfork); /* data or attr fork */
-
-/*
- * Returns whether the selected fork of the inode has exactly one
- * block or not. For the data fork we check this matches di_size,
- * implying the file's range is 0..bsize-1.
- */
-int
-xfs_bmap_one_block(
- struct xfs_inode *ip, /* incore inode */
- int whichfork); /* data or attr fork */
-
-/*
- * Read in the extents to iu_extents.
- * All inode fields are set up by caller, we just traverse the btree
- * and copy the records in.
- */
-int /* error */
-xfs_bmap_read_extents(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- int whichfork); /* data or attr fork */
-
-/*
- * Map file blocks to filesystem blocks.
- * File range is given by the bno/len pair.
- * Adds blocks to file if a write ("flags & XFS_BMAPI_WRITE" set)
- * into a hole or past eof.
- * Only allocates blocks from a single allocation group,
- * to avoid locking problems.
- * The returned value in "firstblock" from the first call in a transaction
- * must be remembered and presented to subsequent calls in "firstblock".
- * An upper bound for the number of blocks to be allocated is supplied to
- * the first call in "total"; if no allocation group has that many free
- * blocks then the call will fail (return NULLFSBLOCK in "firstblock").
- */
-int /* error */
-xfs_bmapi(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- xfs_fileoff_t bno, /* starting file offs. mapped */
- xfs_filblks_t len, /* length to map in file */
- int flags, /* XFS_BMAPI_... */
- xfs_fsblock_t *firstblock, /* first allocated block
- controls a.g. for allocs */
- xfs_extlen_t total, /* total blocks needed */
- struct xfs_bmbt_irec *mval, /* output: map values */
- int *nmap, /* i/o: mval size/count */
- xfs_bmap_free_t *flist); /* i/o: list extents to free */
-
-/*
- * Map file blocks to filesystem blocks, simple version.
- * One block only, read-only.
- * For flags, only the XFS_BMAPI_ATTRFORK flag is examined.
- * For the other flag values, the effect is as if XFS_BMAPI_METADATA
- * was set and all the others were clear.
- */
-int /* error */
-xfs_bmapi_single(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- int whichfork, /* data or attr fork */
- xfs_fsblock_t *fsb, /* output: mapped block */
- xfs_fileoff_t bno); /* starting file offs. mapped */
-
-/*
- * Unmap (remove) blocks from a file.
- * If nexts is nonzero then the number of extents to remove is limited to
- * that value. If not all extents in the block range can be removed then
- * *done is set.
- */
-int /* error */
-xfs_bunmapi(
- struct xfs_trans *tp, /* transaction pointer */
- struct xfs_inode *ip, /* incore inode */
- xfs_fileoff_t bno, /* starting offset to unmap */
- xfs_filblks_t len, /* length to unmap in file */
- int flags, /* XFS_BMAPI_... */
- xfs_extnum_t nexts, /* number of extents max */
- xfs_fsblock_t *firstblock, /* first allocated block
- controls a.g. for allocs */
- xfs_bmap_free_t *flist, /* i/o: list extents to free */
- int *done); /* set if not done yet */
-
-/*
- * Check an extent list, which has just been read, for
- * any bit in the extent flag field.
- */
-int
-xfs_check_nostate_extents(
- struct xfs_ifork *ifp,
- xfs_extnum_t idx,
- xfs_extnum_t num);
-
-uint
-xfs_default_attroffset(
- struct xfs_inode *ip);
+int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
+void xfs_bmap_add_free(xfs_fsblock_t bno, xfs_filblks_t len,
+ struct xfs_bmap_free *flist, struct xfs_mount *mp);
+void xfs_bmap_cancel(struct xfs_bmap_free *flist);
+void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork);
+int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);
+int xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_fileoff_t *last_block, int whichfork);
+int xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_fileoff_t *unused, int whichfork);
+int xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);
+int xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork);
+int xfs_bmapi_read(struct xfs_inode *ip, xfs_fileoff_t bno,
+ xfs_filblks_t len, struct xfs_bmbt_irec *mval,
+ int *nmap, int flags);
+int xfs_bmapi_delay(struct xfs_inode *ip, xfs_fileoff_t bno,
+ xfs_filblks_t len, struct xfs_bmbt_irec *mval,
+ int *nmap, int flags);
+int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_fileoff_t bno, xfs_filblks_t len, int flags,
+ xfs_fsblock_t *firstblock, xfs_extlen_t total,
+ struct xfs_bmbt_irec *mval, int *nmap,
+ struct xfs_bmap_free *flist);
+int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_fileoff_t bno, xfs_filblks_t len, int flags,
+ xfs_extnum_t nexts, xfs_fsblock_t *firstblock,
+ struct xfs_bmap_free *flist, int *done);
+int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,
+ xfs_extnum_t num);
+uint xfs_default_attroffset(struct xfs_inode *ip);
#ifdef __KERNEL__
-
-/*
- * Routine to be called at transaction's end by xfs_bmapi, xfs_bunmapi
- * caller. Frees all the extents that need freeing, which must be done
- * last due to locking considerations.
- *
- * Return 1 if the given transaction was committed and a new one allocated,
- * and 0 otherwise.
- */
-int /* error */
-xfs_bmap_finish(
- struct xfs_trans **tp, /* transaction pointer addr */
- xfs_bmap_free_t *flist, /* i/o: list extents to free */
- int *committed); /* xact committed or not */
-
/* bmap to userspace formatter - copy to user & advance pointer */
typedef int (*xfs_bmap_format_t)(void **, struct getbmapx *, int *);
-/*
- * Get inode's extents as described in bmv, and format for output.
- */
-int /* error code */
-xfs_getbmap(
- xfs_inode_t *ip,
- struct getbmapx *bmv, /* user bmap structure */
- xfs_bmap_format_t formatter, /* format to user */
- void *arg); /* formatter arg */
-
-/*
- * Check if the endoff is outside the last extent. If so the caller will grow
- * the allocation to a stripe unit boundary
- */
-int
-xfs_bmap_eof(
- struct xfs_inode *ip,
- xfs_fileoff_t endoff,
- int whichfork,
- int *eof);
-
-/*
- * Count fsblocks of the given fork.
- */
-int
-xfs_bmap_count_blocks(
- xfs_trans_t *tp,
- struct xfs_inode *ip,
- int whichfork,
- int *count);
-
-int
-xfs_bmap_punch_delalloc_range(
- struct xfs_inode *ip,
- xfs_fileoff_t start_fsb,
- xfs_fileoff_t length);
+int xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,
+ int *committed);
+int xfs_getbmap(struct xfs_inode *ip, struct getbmapx *bmv,
+ xfs_bmap_format_t formatter, void *arg);
+int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
+ int whichfork, int *eof);
+int xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+ int whichfork, int *count);
+int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
+ xfs_fileoff_t start_fsb, xfs_fileoff_t length);
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index 87d3c10..e2f5d59 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -33,7 +33,6 @@
#include "xfs_inode_item.h"
#include "xfs_alloc.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_itable.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
@@ -425,10 +424,10 @@ xfs_bmbt_to_bmdr(
xfs_bmbt_key_t *tkp;
__be64 *tpp;
- ASSERT(be32_to_cpu(rblock->bb_magic) == XFS_BMAP_MAGIC);
- ASSERT(be64_to_cpu(rblock->bb_u.l.bb_leftsib) == NULLDFSBNO);
- ASSERT(be64_to_cpu(rblock->bb_u.l.bb_rightsib) == NULLDFSBNO);
- ASSERT(be16_to_cpu(rblock->bb_level) > 0);
+ ASSERT(rblock->bb_magic == cpu_to_be32(XFS_BMAP_MAGIC));
+ ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO));
+ ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO));
+ ASSERT(rblock->bb_level != 0);
dblock->bb_level = rblock->bb_level;
dblock->bb_numrecs = rblock->bb_numrecs;
dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0);
@@ -732,95 +731,6 @@ xfs_bmbt_recs_inorder(
}
#endif /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t *xfs_bmbt_trace_buf;
-
-STATIC void
-xfs_bmbt_trace_enter(
- struct xfs_btree_cur *cur,
- const char *func,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- struct xfs_inode *ip = cur->bc_private.b.ip;
- int whichfork = cur->bc_private.b.whichfork;
-
- ktrace_enter(xfs_bmbt_trace_buf,
- (void *)((__psint_t)type | (whichfork << 8) | (line << 16)),
- (void *)func, (void *)s, (void *)ip, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-
-STATIC void
-xfs_bmbt_trace_cursor(
- struct xfs_btree_cur *cur,
- __uint32_t *s0,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- struct xfs_bmbt_rec_host r;
-
- xfs_bmbt_set_all(&r, &cur->bc_rec.b);
-
- *s0 = (cur->bc_nlevels << 24) |
- (cur->bc_private.b.flags << 16) |
- cur->bc_private.b.allocated;
- *l0 = r.l0;
- *l1 = r.l1;
-}
-
-STATIC void
-xfs_bmbt_trace_key(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *key,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *l0 = be64_to_cpu(key->bmbt.br_startoff);
- *l1 = 0;
-}
-
-/* Endian flipping versions of the bmbt extraction functions */
-STATIC void
-xfs_bmbt_disk_get_all(
- xfs_bmbt_rec_t *r,
- xfs_bmbt_irec_t *s)
-{
- __xfs_bmbt_get_all(get_unaligned_be64(&r->l0),
- get_unaligned_be64(&r->l1), s);
-}
-
-STATIC void
-xfs_bmbt_trace_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- __uint64_t *l0,
- __uint64_t *l1,
- __uint64_t *l2)
-{
- struct xfs_bmbt_irec irec;
-
- xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
- *l0 = irec.br_startoff;
- *l1 = irec.br_startblock;
- *l2 = irec.br_blockcount;
-}
-#endif /* XFS_BTREE_TRACE */
-
static const struct xfs_btree_ops xfs_bmbt_ops = {
.rec_len = sizeof(xfs_bmbt_rec_t),
.key_len = sizeof(xfs_bmbt_key_t),
@@ -837,18 +747,10 @@ static const struct xfs_btree_ops xfs_bmbt_ops = {
.init_rec_from_cur = xfs_bmbt_init_rec_from_cur,
.init_ptr_from_cur = xfs_bmbt_init_ptr_from_cur,
.key_diff = xfs_bmbt_key_diff,
-
#ifdef DEBUG
.keys_inorder = xfs_bmbt_keys_inorder,
.recs_inorder = xfs_bmbt_recs_inorder,
#endif
-
-#ifdef XFS_BTREE_TRACE
- .trace_enter = xfs_bmbt_trace_enter,
- .trace_cursor = xfs_bmbt_trace_cursor,
- .trace_key = xfs_bmbt_trace_key,
- .trace_record = xfs_bmbt_trace_record,
-#endif
};
/*
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c
index 2f9e97c..1f19f03 100644
--- a/fs/xfs/xfs_btree.c
+++ b/fs/xfs/xfs_btree.c
@@ -32,7 +32,6 @@
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -66,11 +65,11 @@ xfs_btree_check_lblock(
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
block->bb_u.l.bb_leftsib &&
- (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO ||
+ (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_leftsib))) &&
block->bb_u.l.bb_rightsib &&
- (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO ||
+ (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||
XFS_FSB_SANITY_CHECK(mp,
be64_to_cpu(block->bb_u.l.bb_rightsib)));
if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,
@@ -105,10 +104,10 @@ xfs_btree_check_sblock(
be16_to_cpu(block->bb_level) == level &&
be16_to_cpu(block->bb_numrecs) <=
cur->bc_ops->get_maxrecs(cur, level) &&
- (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK ||
+ (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&
block->bb_u.s.bb_leftsib &&
- (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK ||
+ (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||
be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&
block->bb_u.s.bb_rightsib;
if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp,
@@ -276,8 +275,7 @@ xfs_btree_dup_cursor(
return error;
}
new->bc_bufs[i] = bp;
- ASSERT(bp);
- ASSERT(!XFS_BUF_GETERROR(bp));
+ ASSERT(!xfs_buf_geterror(bp));
} else
new->bc_bufs[i] = NULL;
}
@@ -468,8 +466,7 @@ xfs_btree_get_bufl(
ASSERT(fsbno != NULLFSBLOCK);
d = XFS_FSB_TO_DADDR(mp, fsbno);
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
- ASSERT(bp);
- ASSERT(!XFS_BUF_GETERROR(bp));
+ ASSERT(!xfs_buf_geterror(bp));
return bp;
}
@@ -492,8 +489,7 @@ xfs_btree_get_bufs(
ASSERT(agbno != NULLAGBLOCK);
d = XFS_AGB_TO_DADDR(mp, agno, agbno);
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);
- ASSERT(bp);
- ASSERT(!XFS_BUF_GETERROR(bp));
+ ASSERT(!xfs_buf_geterror(bp));
return bp;
}
@@ -511,9 +507,9 @@ xfs_btree_islastblock(
block = xfs_btree_get_block(cur, level, &bp);
xfs_btree_check_block(cur, block, level, bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO;
+ return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);
else
- return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK;
+ return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
}
/*
@@ -633,9 +629,9 @@ xfs_btree_read_bufl(
mp->m_bsize, lock, &bp))) {
return error;
}
- ASSERT(!bp || !XFS_BUF_GETERROR(bp));
+ ASSERT(!xfs_buf_geterror(bp));
if (bp)
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval);
+ xfs_buf_set_ref(bp, refval);
*bpp = bp;
return 0;
}
@@ -777,14 +773,14 @@ xfs_btree_setbuf(
b = XFS_BUF_TO_BLOCK(bp);
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO)
+ if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
- if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO)
+ if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
} else {
- if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK)
+ if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA;
- if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK)
+ if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))
cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;
}
}
@@ -795,9 +791,9 @@ xfs_btree_ptr_is_null(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return be64_to_cpu(ptr->l) == NULLDFSBNO;
+ return ptr->l == cpu_to_be64(NULLDFSBNO);
else
- return be32_to_cpu(ptr->s) == NULLAGBLOCK;
+ return ptr->s == cpu_to_be32(NULLAGBLOCK);
}
STATIC void
@@ -923,12 +919,12 @@ xfs_btree_ptr_to_daddr(
union xfs_btree_ptr *ptr)
{
if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO);
+ ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
} else {
ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
- ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK);
+ ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
be32_to_cpu(ptr->s));
@@ -943,13 +939,13 @@ xfs_btree_set_refs(
switch (cur->bc_btnum) {
case XFS_BTNUM_BNO:
case XFS_BTNUM_CNT:
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_ALLOC_BTREE_REF);
+ xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);
break;
case XFS_BTNUM_INO:
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_INOMAP, XFS_INO_BTREE_REF);
+ xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);
break;
case XFS_BTNUM_BMAP:
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, XFS_BMAP_BTREE_REF);
+ xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);
break;
default:
ASSERT(0);
@@ -974,8 +970,8 @@ xfs_btree_get_buf_block(
*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
mp->m_bsize, flags);
- ASSERT(*bpp);
- ASSERT(!XFS_BUF_GETERROR(*bpp));
+ if (!*bpp)
+ return ENOMEM;
*block = XFS_BUF_TO_BLOCK(*bpp);
return 0;
@@ -1007,8 +1003,7 @@ xfs_btree_read_buf_block(
if (error)
return error;
- ASSERT(*bpp != NULL);
- ASSERT(!XFS_BUF_GETERROR(*bpp));
+ ASSERT(!xfs_buf_geterror(*bpp));
xfs_btree_set_refs(cur, *bpp);
*block = XFS_BUF_TO_BLOCK(*bpp);
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h
index 82fafc6..5b240de 100644
--- a/fs/xfs/xfs_btree.h
+++ b/fs/xfs/xfs_btree.h
@@ -199,25 +199,6 @@ struct xfs_btree_ops {
union xfs_btree_rec *r1,
union xfs_btree_rec *r2);
#endif
-
- /* btree tracing */
-#ifdef XFS_BTREE_TRACE
- void (*trace_enter)(struct xfs_btree_cur *, const char *,
- char *, int, int, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t,
- __psunsigned_t, __psunsigned_t);
- void (*trace_cursor)(struct xfs_btree_cur *, __uint32_t *,
- __uint64_t *, __uint64_t *);
- void (*trace_key)(struct xfs_btree_cur *,
- union xfs_btree_key *, __uint64_t *,
- __uint64_t *);
- void (*trace_record)(struct xfs_btree_cur *,
- union xfs_btree_rec *, __uint64_t *,
- __uint64_t *, __uint64_t *);
-#endif
};
/*
@@ -281,7 +262,7 @@ typedef struct xfs_btree_cur
/*
* Convert from buffer to btree block header.
*/
-#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_BLOCK(bp) ((struct xfs_btree_block *)((bp)->b_addr))
/*
@@ -452,4 +433,23 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
(XFS_FSB_TO_AGNO(mp, fsb) < mp->m_sb.sb_agcount && \
XFS_FSB_TO_AGBNO(mp, fsb) < mp->m_sb.sb_agblocks)
+/*
+ * Trace hooks. Currently not implemented as they need to be ported
+ * over to the generic tracing functionality, which is some effort.
+ *
+ * i,j = integer (32 bit)
+ * b = btree block buffer (xfs_buf_t)
+ * p = btree ptr
+ * r = btree record
+ * k = btree key
+ */
+#define XFS_BTREE_TRACE_ARGBI(c, b, i)
+#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
+#define XFS_BTREE_TRACE_ARGI(c, i)
+#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
+#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
+#define XFS_BTREE_TRACE_ARGIK(c, i, k)
+#define XFS_BTREE_TRACE_ARGR(c, r)
+#define XFS_BTREE_TRACE_CURSOR(c, t)
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/xfs_btree_trace.c b/fs/xfs/xfs_btree_trace.c
deleted file mode 100644
index 44ff942..0000000
--- a/fs/xfs/xfs_btree_trace.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#include "xfs.h"
-#include "xfs_types.h"
-#include "xfs_inum.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_inode.h"
-#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
-
-STATIC void
-xfs_btree_trace_ptr(
- struct xfs_btree_cur *cur,
- union xfs_btree_ptr ptr,
- __psunsigned_t *high,
- __psunsigned_t *low)
-{
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
- __u64 val = be64_to_cpu(ptr.l);
- *high = val >> 32;
- *low = (int)val;
- } else {
- *high = 0;
- *low = be32_to_cpu(ptr.s);
- }
-}
-
-/*
- * Add a trace buffer entry for arguments, for a buffer & 1 integer arg.
- */
-void
-xfs_btree_trace_argbi(
- const char *func,
- struct xfs_btree_cur *cur,
- struct xfs_buf *b,
- int i,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBI,
- line, (__psunsigned_t)b, i, 0, 0, 0, 0, 0,
- 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for a buffer & 2 integer args.
- */
-void
-xfs_btree_trace_argbii(
- const char *func,
- struct xfs_btree_cur *cur,
- struct xfs_buf *b,
- int i0,
- int i1,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGBII,
- line, (__psunsigned_t)b, i0, i1, 0, 0, 0, 0,
- 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for 3 block-length args
- * and an integer arg.
- */
-void
-xfs_btree_trace_argfffi(
- const char *func,
- struct xfs_btree_cur *cur,
- xfs_dfiloff_t o,
- xfs_dfsbno_t b,
- xfs_dfilblks_t i,
- int j,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGFFFI,
- line,
- o >> 32, (int)o,
- b >> 32, (int)b,
- i >> 32, (int)i,
- (int)j, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for one integer arg.
- */
-void
-xfs_btree_trace_argi(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- int line)
-{
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGI,
- line, i, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, key.
- */
-void
-xfs_btree_trace_argipk(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- union xfs_btree_ptr ptr,
- union xfs_btree_key *key,
- int line)
-{
- __psunsigned_t high, low;
- __uint64_t l0, l1;
-
- xfs_btree_trace_ptr(cur, ptr, &high, &low);
- cur->bc_ops->trace_key(cur, key, &l0, &l1);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPK,
- line, i, high, low,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, fsblock, rec.
- */
-void
-xfs_btree_trace_argipr(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- union xfs_btree_ptr ptr,
- union xfs_btree_rec *rec,
- int line)
-{
- __psunsigned_t high, low;
- __uint64_t l0, l1, l2;
-
- xfs_btree_trace_ptr(cur, ptr, &high, &low);
- cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIPR,
- line, i,
- high, low,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- l2 >> 32, (int)l2,
- 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for int, key.
- */
-void
-xfs_btree_trace_argik(
- const char *func,
- struct xfs_btree_cur *cur,
- int i,
- union xfs_btree_key *key,
- int line)
-{
- __uint64_t l0, l1;
-
- cur->bc_ops->trace_key(cur, key, &l0, &l1);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGIK,
- line, i,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- 0, 0, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for arguments, for record.
- */
-void
-xfs_btree_trace_argr(
- const char *func,
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- int line)
-{
- __uint64_t l0, l1, l2;
-
- cur->bc_ops->trace_record(cur, rec, &l0, &l1, &l2);
- cur->bc_ops->trace_enter(cur, func, XBT_ARGS, XFS_BTREE_KTRACE_ARGR,
- line,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- l2 >> 32, (int)l2,
- 0, 0, 0, 0, 0);
-}
-
-/*
- * Add a trace buffer entry for the cursor/operation.
- */
-void
-xfs_btree_trace_cursor(
- const char *func,
- struct xfs_btree_cur *cur,
- int type,
- int line)
-{
- __uint32_t s0;
- __uint64_t l0, l1;
- char *s;
-
- switch (type) {
- case XBT_ARGS:
- s = "args";
- break;
- case XBT_ENTRY:
- s = "entry";
- break;
- case XBT_ERROR:
- s = "error";
- break;
- case XBT_EXIT:
- s = "exit";
- break;
- default:
- s = "unknown";
- break;
- }
-
- cur->bc_ops->trace_cursor(cur, &s0, &l0, &l1);
- cur->bc_ops->trace_enter(cur, func, s, XFS_BTREE_KTRACE_CUR, line,
- s0,
- l0 >> 32, (int)l0,
- l1 >> 32, (int)l1,
- (__psunsigned_t)cur->bc_bufs[0],
- (__psunsigned_t)cur->bc_bufs[1],
- (__psunsigned_t)cur->bc_bufs[2],
- (__psunsigned_t)cur->bc_bufs[3],
- (cur->bc_ptrs[0] << 16) | cur->bc_ptrs[1],
- (cur->bc_ptrs[2] << 16) | cur->bc_ptrs[3]);
-}
diff --git a/fs/xfs/xfs_btree_trace.h b/fs/xfs/xfs_btree_trace.h
deleted file mode 100644
index 2d8a309..0000000
--- a/fs/xfs/xfs_btree_trace.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Copyright (c) 2008 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_BTREE_TRACE_H__
-#define __XFS_BTREE_TRACE_H__
-
-struct xfs_btree_cur;
-struct xfs_buf;
-
-
-/*
- * Trace hooks.
- * i,j = integer (32 bit)
- * b = btree block buffer (xfs_buf_t)
- * p = btree ptr
- * r = btree record
- * k = btree key
- */
-
-#ifdef XFS_BTREE_TRACE
-
-/*
- * Trace buffer entry types.
- */
-#define XFS_BTREE_KTRACE_ARGBI 1
-#define XFS_BTREE_KTRACE_ARGBII 2
-#define XFS_BTREE_KTRACE_ARGFFFI 3
-#define XFS_BTREE_KTRACE_ARGI 4
-#define XFS_BTREE_KTRACE_ARGIPK 5
-#define XFS_BTREE_KTRACE_ARGIPR 6
-#define XFS_BTREE_KTRACE_ARGIK 7
-#define XFS_BTREE_KTRACE_ARGR 8
-#define XFS_BTREE_KTRACE_CUR 9
-
-/*
- * Sub-types for cursor traces.
- */
-#define XBT_ARGS 0
-#define XBT_ENTRY 1
-#define XBT_ERROR 2
-#define XBT_EXIT 3
-
-void xfs_btree_trace_argbi(const char *, struct xfs_btree_cur *,
- struct xfs_buf *, int, int);
-void xfs_btree_trace_argbii(const char *, struct xfs_btree_cur *,
- struct xfs_buf *, int, int, int);
-void xfs_btree_trace_argi(const char *, struct xfs_btree_cur *, int, int);
-void xfs_btree_trace_argipk(const char *, struct xfs_btree_cur *, int,
- union xfs_btree_ptr, union xfs_btree_key *, int);
-void xfs_btree_trace_argipr(const char *, struct xfs_btree_cur *, int,
- union xfs_btree_ptr, union xfs_btree_rec *, int);
-void xfs_btree_trace_argik(const char *, struct xfs_btree_cur *, int,
- union xfs_btree_key *, int);
-void xfs_btree_trace_argr(const char *, struct xfs_btree_cur *,
- union xfs_btree_rec *, int);
-void xfs_btree_trace_cursor(const char *, struct xfs_btree_cur *, int, int);
-
-#define XFS_BTREE_TRACE_ARGBI(c, b, i) \
- xfs_btree_trace_argbi(__func__, c, b, i, __LINE__)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j) \
- xfs_btree_trace_argbii(__func__, c, b, i, j, __LINE__)
-#define XFS_BTREE_TRACE_ARGI(c, i) \
- xfs_btree_trace_argi(__func__, c, i, __LINE__)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, k) \
- xfs_btree_trace_argipk(__func__, c, i, p, k, __LINE__)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r) \
- xfs_btree_trace_argipr(__func__, c, i, p, r, __LINE__)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k) \
- xfs_btree_trace_argik(__func__, c, i, k, __LINE__)
-#define XFS_BTREE_TRACE_ARGR(c, r) \
- xfs_btree_trace_argr(__func__, c, r, __LINE__)
-#define XFS_BTREE_TRACE_CURSOR(c, t) \
- xfs_btree_trace_cursor(__func__, c, t, __LINE__)
-#else
-#define XFS_BTREE_TRACE_ARGBI(c, b, i)
-#define XFS_BTREE_TRACE_ARGBII(c, b, i, j)
-#define XFS_BTREE_TRACE_ARGI(c, i)
-#define XFS_BTREE_TRACE_ARGIPK(c, i, p, s)
-#define XFS_BTREE_TRACE_ARGIPR(c, i, p, r)
-#define XFS_BTREE_TRACE_ARGIK(c, i, k)
-#define XFS_BTREE_TRACE_ARGR(c, r)
-#define XFS_BTREE_TRACE_CURSOR(c, t)
-#endif /* XFS_BTREE_TRACE */
-
-#endif /* __XFS_BTREE_TRACE_H__ */
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7888a75..eac97ef 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -90,13 +90,11 @@ xfs_buf_item_flush_log_debug(
uint first,
uint last)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
uint nbytes;
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
- if ((bip == NULL) || (bip->bli_item.li_type != XFS_LI_BUF)) {
+ if (bip == NULL || (bip->bli_item.li_type != XFS_LI_BUF))
return;
- }
ASSERT(bip->bli_logged != NULL);
nbytes = last - first + 1;
@@ -126,9 +124,9 @@ xfs_buf_item_log_check(
bp = bip->bli_buf;
ASSERT(XFS_BUF_COUNT(bp) > 0);
- ASSERT(XFS_BUF_PTR(bp) != NULL);
+ ASSERT(bp->b_addr != NULL);
orig = bip->bli_orig;
- buffer = XFS_BUF_PTR(bp);
+ buffer = bp->b_addr;
for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
xfs_emerg(bp->b_mount,
@@ -373,7 +371,6 @@ xfs_buf_item_pin(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
- ASSERT(XFS_BUF_ISBUSY(bip->bli_buf));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE));
@@ -408,7 +405,7 @@ xfs_buf_item_unpin(
int stale = bip->bli_flags & XFS_BLI_STALE;
int freed;
- ASSERT(XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *) == bip);
+ ASSERT(bp->b_fspriv == bip);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_buf_item_unpin(bip);
@@ -420,7 +417,7 @@ xfs_buf_item_unpin(
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -443,7 +440,7 @@ xfs_buf_item_unpin(
* Since the transaction no longer refers to the buffer,
* the buffer should no longer refer to the transaction.
*/
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ bp->b_transp = NULL;
}
/*
@@ -454,13 +451,13 @@ xfs_buf_item_unpin(
*/
if (bip->bli_flags & XFS_BLI_STALE_INODE) {
xfs_buf_do_callbacks(bp);
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_fspriv = NULL;
+ bp->b_iodone = NULL;
} else {
spin_lock(&ailp->xa_lock);
xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
xfs_buf_item_relse(bp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) == NULL);
+ ASSERT(bp->b_fspriv == NULL);
}
xfs_buf_relse(bp);
}
@@ -481,13 +478,13 @@ xfs_buf_item_trylock(
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- if (XFS_BUF_ISPINNED(bp))
+ if (xfs_buf_ispinned(bp))
return XFS_ITEM_PINNED;
- if (!XFS_BUF_CPSEMA(bp))
+ if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
/* take a reference to the buffer. */
- XFS_BUF_HOLD(bp);
+ xfs_buf_hold(bp);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
trace_xfs_buf_item_trylock(bip);
@@ -525,7 +522,7 @@ xfs_buf_item_unlock(
uint hold;
/* Clear the buffer's association with this transaction. */
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ bp->b_transp = NULL;
/*
* If this is a transaction abort, don't return early. Instead, allow
@@ -659,7 +656,7 @@ xfs_buf_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
-static struct xfs_item_ops xfs_buf_item_ops = {
+static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_size = xfs_buf_item_size,
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
@@ -685,7 +682,7 @@ xfs_buf_item_init(
xfs_buf_t *bp,
xfs_mount_t *mp)
{
- xfs_log_item_t *lip;
+ xfs_log_item_t *lip = bp->b_fspriv;
xfs_buf_log_item_t *bip;
int chunks;
int map_size;
@@ -697,12 +694,8 @@ xfs_buf_item_init(
* nothing to do here so return.
*/
ASSERT(bp->b_target->bt_mount == mp);
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- if (lip->li_type == XFS_LI_BUF) {
- return;
- }
- }
+ if (lip != NULL && lip->li_type == XFS_LI_BUF)
+ return;
/*
* chunks is the number of XFS_BLF_CHUNK size pieces
@@ -733,7 +726,7 @@ xfs_buf_item_init(
* to have logged.
*/
bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
- memcpy(bip->bli_orig, XFS_BUF_PTR(bp), XFS_BUF_COUNT(bp));
+ memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
#endif
@@ -741,11 +734,9 @@ xfs_buf_item_init(
* Put the buf item into the list of items attached to the
* buffer at the front.
*/
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- bip->bli_item.li_bio_list =
- XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- }
- XFS_BUF_SET_FSPRIVATE(bp, bip);
+ if (bp->b_fspriv)
+ bip->bli_item.li_bio_list = bp->b_fspriv;
+ bp->b_fspriv = bip;
}
@@ -877,12 +868,11 @@ xfs_buf_item_relse(
trace_xfs_buf_item_relse(bp, _RET_IP_);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
- XFS_BUF_SET_FSPRIVATE(bp, bip->bli_item.li_bio_list);
- if ((XFS_BUF_FSPRIVATE(bp, void *) == NULL) &&
- (XFS_BUF_IODONE_FUNC(bp) != NULL)) {
- XFS_BUF_CLR_IODONE_FUNC(bp);
- }
+ bip = bp->b_fspriv;
+ bp->b_fspriv = bip->bli_item.li_bio_list;
+ if (bp->b_fspriv == NULL)
+ bp->b_iodone = NULL;
+
xfs_buf_rele(bp);
xfs_buf_item_free(bip);
}
@@ -905,21 +895,20 @@ xfs_buf_attach_iodone(
{
xfs_log_item_t *head_lip;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ ASSERT(xfs_buf_islocked(bp));
lip->li_cb = cb;
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- head_lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ head_lip = bp->b_fspriv;
+ if (head_lip) {
lip->li_bio_list = head_lip->li_bio_list;
head_lip->li_bio_list = lip;
} else {
- XFS_BUF_SET_FSPRIVATE(bp, lip);
+ bp->b_fspriv = lip;
}
- ASSERT((XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks) ||
- (XFS_BUF_IODONE_FUNC(bp) == NULL));
- XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+ ASSERT(bp->b_iodone == NULL ||
+ bp->b_iodone == xfs_buf_iodone_callbacks);
+ bp->b_iodone = xfs_buf_iodone_callbacks;
}
/*
@@ -940,8 +929,8 @@ xfs_buf_do_callbacks(
{
struct xfs_log_item *lip;
- while ((lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *)) != NULL) {
- XFS_BUF_SET_FSPRIVATE(bp, lip->li_bio_list);
+ while ((lip = bp->b_fspriv) != NULL) {
+ bp->b_fspriv = lip->li_bio_list;
ASSERT(lip->li_cb != NULL);
/*
* Clear the next pointer so we don't have any
@@ -970,7 +959,7 @@ xfs_buf_iodone_callbacks(
static ulong lasttime;
static xfs_buftarg_t *lasttarg;
- if (likely(!XFS_BUF_GETERROR(bp)))
+ if (likely(!xfs_buf_geterror(bp)))
goto do_callbacks;
/*
@@ -978,19 +967,18 @@ xfs_buf_iodone_callbacks(
* I/O errors, there's no point in giving this a retry.
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
- XFS_BUF_SUPER_STALE(bp);
+ xfs_buf_stale(bp);
+ XFS_BUF_DONE(bp);
trace_xfs_buf_item_iodone(bp, _RET_IP_);
goto do_callbacks;
}
- if (XFS_BUF_TARGET(bp) != lasttarg ||
+ if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) {
lasttime = jiffies;
- xfs_alert(mp, "Device %s: metadata write error block 0x%llx",
- XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
- (__uint64_t)XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp, __func__);
}
- lasttarg = XFS_BUF_TARGET(bp);
+ lasttarg = bp->b_target;
/*
* If the write was asynchronous then no one will be looking for the
@@ -1001,14 +989,13 @@ xfs_buf_iodone_callbacks(
* around.
*/
if (XFS_BUF_ISASYNC(bp)) {
- XFS_BUF_ERROR(bp, 0); /* errno of 0 unsets the flag */
+ xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
if (!XFS_BUF_ISSTALE(bp)) {
- XFS_BUF_DELAYWRITE(bp);
+ xfs_buf_delwri_queue(bp);
XFS_BUF_DONE(bp);
- XFS_BUF_SET_START(bp);
}
- ASSERT(XFS_BUF_IODONE_FUNC(bp));
+ ASSERT(bp->b_iodone != NULL);
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
xfs_buf_relse(bp);
return;
@@ -1018,16 +1005,15 @@ xfs_buf_iodone_callbacks(
* If the write of the buffer was synchronous, we want to make
* sure to return the error to the caller of xfs_bwrite().
*/
- XFS_BUF_STALE(bp);
+ xfs_buf_stale(bp);
XFS_BUF_DONE(bp);
- XFS_BUF_UNDELAYWRITE(bp);
trace_xfs_buf_error_relse(bp, _RET_IP_);
do_callbacks:
xfs_buf_do_callbacks(bp);
- XFS_BUF_SET_FSPRIVATE(bp, NULL);
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_fspriv = NULL;
+ bp->b_iodone = NULL;
xfs_buf_ioend(bp, 0);
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 6102ac6..77c7425 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -24,11 +24,12 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
@@ -36,10 +37,6 @@
#include "xfs_bmap.h"
#include "xfs_attr.h"
#include "xfs_attr_leaf.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -89,7 +86,7 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
*/
STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra);
+STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
xfs_da_state_blk_t *drop_blk,
xfs_da_state_blk_t *save_blk);
@@ -321,11 +318,11 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
ASSERT(bp != NULL);
node = bp->data;
oldroot = blk1->bp->data;
- if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC) {
+ if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
(char *)oldroot);
} else {
- ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
leaf = (xfs_dir2_leaf_t *)oldroot;
size = (int)((char *)&leaf->ents[be16_to_cpu(leaf->hdr.count)] -
(char *)leaf);
@@ -352,7 +349,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
node->hdr.count = cpu_to_be16(2);
#ifdef DEBUG
- if (be16_to_cpu(oldroot->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC) {
+ if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
ASSERT(blk1->blkno >= mp->m_dirleafblk &&
blk1->blkno < mp->m_dirfreeblk);
ASSERT(blk2->blkno >= mp->m_dirleafblk &&
@@ -384,7 +381,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
int useextra;
node = oldblk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
/*
* With V2 dirs the extra block is data or freespace.
@@ -483,8 +480,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
node1 = node2;
node2 = tmpnode;
}
- ASSERT(be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC);
- ASSERT(be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+ ASSERT(node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
count = (be16_to_cpu(node1->hdr.count) - be16_to_cpu(node2->hdr.count)) / 2;
if (count == 0)
return;
@@ -578,7 +575,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
int tmp;
node = oldblk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
ASSERT(newblk->blkno != 0);
if (state->args->whichfork == XFS_DATA_FORK)
@@ -695,6 +692,24 @@ xfs_da_join(xfs_da_state_t *state)
return(error);
}
+#ifdef DEBUG
+static void
+xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level)
+{
+ __be16 magic = blkinfo->magic;
+
+ if (level == 1) {
+ ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+ magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
+ } else
+ ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+ ASSERT(!blkinfo->forw);
+ ASSERT(!blkinfo->back);
+}
+#else /* !DEBUG */
+#define xfs_da_blkinfo_onlychild_validate(blkinfo, level)
+#endif /* !DEBUG */
+
/*
* We have only one entry in the root. Copy the only remaining child of
* the old root to block 0 as the new root node.
@@ -703,8 +718,6 @@ STATIC int
xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
{
xfs_da_intnode_t *oldroot;
- /* REFERENCED */
- xfs_da_blkinfo_t *blkinfo;
xfs_da_args_t *args;
xfs_dablk_t child;
xfs_dabuf_t *bp;
@@ -714,7 +727,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
ASSERT(args != NULL);
ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
oldroot = root_blk->bp->data;
- ASSERT(be16_to_cpu(oldroot->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT(!oldroot->hdr.info.forw);
ASSERT(!oldroot->hdr.info.back);
@@ -735,15 +748,9 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
if (error)
return(error);
ASSERT(bp != NULL);
- blkinfo = bp->data;
- if (be16_to_cpu(oldroot->hdr.level) == 1) {
- ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DIR2_LEAFN_MAGIC ||
- be16_to_cpu(blkinfo->magic) == XFS_ATTR_LEAF_MAGIC);
- } else {
- ASSERT(be16_to_cpu(blkinfo->magic) == XFS_DA_NODE_MAGIC);
- }
- ASSERT(!blkinfo->forw);
- ASSERT(!blkinfo->back);
+ xfs_da_blkinfo_onlychild_validate(bp->data,
+ be16_to_cpu(oldroot->hdr.level));
+
memcpy(root_blk->bp->data, bp->data, state->blocksize);
xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
error = xfs_da_shrink_inode(args, child, bp);
@@ -776,7 +783,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
*/
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
node = (xfs_da_intnode_t *)info;
count = be16_to_cpu(node->hdr.count);
if (count > (state->node_ents >> 1)) {
@@ -836,7 +843,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
count -= state->node_ents >> 2;
count -= be16_to_cpu(node->hdr.count);
node = bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
count -= be16_to_cpu(node->hdr.count);
xfs_da_brelse(state->args->trans, bp);
if (count >= 0)
@@ -911,7 +918,7 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
}
for (blk--, level--; level >= 0; blk--, level--) {
node = blk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
btree = &node->btree[ blk->index ];
if (be32_to_cpu(btree->hashval) == lasthash)
break;
@@ -979,8 +986,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
drop_node = drop_blk->bp->data;
save_node = save_blk->bp->data;
- ASSERT(be16_to_cpu(drop_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
- ASSERT(be16_to_cpu(save_node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
+ ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
tp = state->args->trans;
/*
@@ -1278,8 +1285,8 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
node1 = node1_bp->data;
node2 = node2_bp->data;
- ASSERT((be16_to_cpu(node1->hdr.info.magic) == XFS_DA_NODE_MAGIC) &&
- (be16_to_cpu(node2->hdr.info.magic) == XFS_DA_NODE_MAGIC));
+ ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
+ node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
((be32_to_cpu(node2->btree[0].hashval) <
be32_to_cpu(node1->btree[0].hashval)) ||
@@ -1299,7 +1306,7 @@ xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
xfs_da_intnode_t *node;
node = bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if (count)
*count = be16_to_cpu(node->hdr.count);
if (!node->hdr.count)
@@ -1412,7 +1419,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
for (blk = &path->blk[level]; level >= 0; blk--, level--) {
ASSERT(blk->bp != NULL);
node = blk->bp->data;
- ASSERT(be16_to_cpu(node->hdr.info.magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
blk->index++;
blkno = be32_to_cpu(node->btree[blk->index].before);
@@ -1451,9 +1458,9 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
return(error);
ASSERT(blk->bp != NULL);
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_DA_NODE_MAGIC ||
- be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC ||
- be16_to_cpu(info->magic) == XFS_ATTR_LEAF_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
+ info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
+ info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
blk->magic = be16_to_cpu(info->magic);
if (blk->magic == XFS_DA_NODE_MAGIC) {
node = (xfs_da_intnode_t *)info;
@@ -1546,79 +1553,60 @@ const struct xfs_nameops xfs_default_nameops = {
.compname = xfs_da_compname
};
-/*
- * Add a block to the btree ahead of the file.
- * Return the new block number to the caller.
- */
int
-xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
+xfs_da_grow_inode_int(
+ struct xfs_da_args *args,
+ xfs_fileoff_t *bno,
+ int count)
{
- xfs_fileoff_t bno, b;
- xfs_bmbt_irec_t map;
- xfs_bmbt_irec_t *mapp;
- xfs_inode_t *dp;
- int nmap, error, w, count, c, got, i, mapi;
- xfs_trans_t *tp;
- xfs_mount_t *mp;
- xfs_drfsbno_t nblks;
-
- dp = args->dp;
- mp = dp->i_mount;
- w = args->whichfork;
- tp = args->trans;
- nblks = dp->i_d.di_nblocks;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ int w = args->whichfork;
+ xfs_drfsbno_t nblks = dp->i_d.di_nblocks;
+ struct xfs_bmbt_irec map, *mapp;
+ int nmap, error, got, i, mapi;
/*
- * For new directories adjust the file offset and block count.
- */
- if (w == XFS_DATA_FORK) {
- bno = mp->m_dirleafblk;
- count = mp->m_dirblkfsbs;
- } else {
- bno = 0;
- count = 1;
- }
- /*
* Find a spot in the file space to put the new block.
*/
- if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, w)))
+ error = xfs_bmap_first_unused(tp, dp, count, bno, w);
+ if (error)
return error;
- if (w == XFS_DATA_FORK)
- ASSERT(bno >= mp->m_dirleafblk && bno < mp->m_dirfreeblk);
+
/*
* Try mapping it in one filesystem block.
*/
nmap = 1;
ASSERT(args->firstblock != NULL);
- if ((error = xfs_bmapi(tp, dp, bno, count,
- xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|
- XFS_BMAPI_CONTIG,
+ error = xfs_bmapi_write(tp, dp, *bno, count,
+ xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
args->firstblock, args->total, &map, &nmap,
- args->flist))) {
+ args->flist);
+ if (error)
return error;
- }
+
ASSERT(nmap <= 1);
if (nmap == 1) {
mapp = &map;
mapi = 1;
- }
- /*
- * If we didn't get it and the block might work if fragmented,
- * try without the CONTIG flag. Loop until we get it all.
- */
- else if (nmap == 0 && count > 1) {
+ } else if (nmap == 0 && count > 1) {
+ xfs_fileoff_t b;
+ int c;
+
+ /*
+ * If we didn't get it and the block might work if fragmented,
+ * try without the CONTIG flag. Loop until we get it all.
+ */
mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
- for (b = bno, mapi = 0; b < bno + count; ) {
+ for (b = *bno, mapi = 0; b < *bno + count; ) {
nmap = MIN(XFS_BMAP_MAX_NMAP, count);
- c = (int)(bno + count - b);
- if ((error = xfs_bmapi(tp, dp, b, c,
- xfs_bmapi_aflag(w)|XFS_BMAPI_WRITE|
- XFS_BMAPI_METADATA,
+ c = (int)(*bno + count - b);
+ error = xfs_bmapi_write(tp, dp, b, c,
+ xfs_bmapi_aflag(w)|XFS_BMAPI_METADATA,
args->firstblock, args->total,
- &mapp[mapi], &nmap, args->flist))) {
- kmem_free(mapp);
- return error;
- }
+ &mapp[mapi], &nmap, args->flist);
+ if (error)
+ goto out_free_map;
if (nmap < 1)
break;
mapi += nmap;
@@ -1629,24 +1617,53 @@ xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno)
mapi = 0;
mapp = NULL;
}
+
/*
* Count the blocks we got, make sure it matches the total.
*/
for (i = 0, got = 0; i < mapi; i++)
got += mapp[i].br_blockcount;
- if (got != count || mapp[0].br_startoff != bno ||
+ if (got != count || mapp[0].br_startoff != *bno ||
mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
- bno + count) {
- if (mapp != &map)
- kmem_free(mapp);
- return XFS_ERROR(ENOSPC);
+ *bno + count) {
+ error = XFS_ERROR(ENOSPC);
+ goto out_free_map;
}
- if (mapp != &map)
- kmem_free(mapp);
+
/* account for newly allocated blocks in reserved blocks total */
args->total -= dp->i_d.di_nblocks - nblks;
- *new_blkno = (xfs_dablk_t)bno;
- return 0;
+
+out_free_map:
+ if (mapp != &map)
+ kmem_free(mapp);
+ return error;
+}
+
+/*
+ * Add a block to the btree ahead of the file.
+ * Return the new block number to the caller.
+ */
+int
+xfs_da_grow_inode(
+ struct xfs_da_args *args,
+ xfs_dablk_t *new_blkno)
+{
+ xfs_fileoff_t bno;
+ int count;
+ int error;
+
+ if (args->whichfork == XFS_DATA_FORK) {
+ bno = args->dp->i_mount->m_dirleafblk;
+ count = args->dp->i_mount->m_dirblkfsbs;
+ } else {
+ bno = 0;
+ count = 1;
+ }
+
+ error = xfs_da_grow_inode_int(args, &bno, count);
+ if (!error)
+ *new_blkno = (xfs_dablk_t)bno;
+ return error;
}
/*
@@ -1704,12 +1721,12 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
/*
* Get values from the moved block.
*/
- if (be16_to_cpu(dead_info->magic) == XFS_DIR2_LEAFN_MAGIC) {
+ if (dead_info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) {
dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
dead_level = 0;
dead_hash = be32_to_cpu(dead_leaf2->ents[be16_to_cpu(dead_leaf2->hdr.count) - 1].hashval);
} else {
- ASSERT(be16_to_cpu(dead_info->magic) == XFS_DA_NODE_MAGIC);
+ ASSERT(dead_info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
dead_node = (xfs_da_intnode_t *)dead_info;
dead_level = be16_to_cpu(dead_node->hdr.level);
dead_hash = be32_to_cpu(dead_node->btree[be16_to_cpu(dead_node->hdr.count) - 1].hashval);
@@ -1768,8 +1785,8 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
goto done;
par_node = par_buf->data;
- if (unlikely(
- be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC ||
+ if (unlikely(par_node->hdr.info.magic !=
+ cpu_to_be16(XFS_DA_NODE_MAGIC) ||
(level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
XFS_ERRLEVEL_LOW, mp);
@@ -1820,7 +1837,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
par_node = par_buf->data;
if (unlikely(
be16_to_cpu(par_node->hdr.level) != level ||
- be16_to_cpu(par_node->hdr.info.magic) != XFS_DA_NODE_MAGIC)) {
+ par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
XFS_ERRLEVEL_LOW, mp);
error = XFS_ERROR(EFSCORRUPTED);
@@ -1930,8 +1947,7 @@ xfs_da_do_buf(
xfs_daddr_t *mappedbnop,
xfs_dabuf_t **bpp,
int whichfork,
- int caller,
- inst_t *ra)
+ int caller)
{
xfs_buf_t *bp = NULL;
xfs_buf_t **bplist;
@@ -1957,33 +1973,16 @@ xfs_da_do_buf(
/*
* Optimize the one-block case.
*/
- if (nfsb == 1) {
- xfs_fsblock_t fsb;
-
- if ((error =
- xfs_bmapi_single(trans, dp, whichfork, &fsb,
- (xfs_fileoff_t)bno))) {
- return error;
- }
+ if (nfsb == 1)
mapp = &map;
- if (fsb == NULLFSBLOCK) {
- nmap = 0;
- } else {
- map.br_startblock = fsb;
- map.br_startoff = (xfs_fileoff_t)bno;
- map.br_blockcount = 1;
- nmap = 1;
- }
- } else {
+ else
mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
- nmap = nfsb;
- if ((error = xfs_bmapi(trans, dp, (xfs_fileoff_t)bno,
- nfsb,
- XFS_BMAPI_METADATA |
- xfs_bmapi_aflag(whichfork),
- NULL, 0, mapp, &nmap, NULL)))
- goto exit0;
- }
+
+ nmap = nfsb;
+ error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
+ &nmap, xfs_bmapi_aflag(whichfork));
+ if (error)
+ goto exit0;
} else {
map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
map.br_startoff = (xfs_fileoff_t)bno;
@@ -2032,7 +2031,7 @@ xfs_da_do_buf(
case 0:
bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
mappedbno, nmapped, 0);
- error = bp ? XFS_BUF_GETERROR(bp) : XFS_ERROR(EIO);
+ error = bp ? bp->b_error : XFS_ERROR(EIO);
break;
case 1:
case 2:
@@ -2054,13 +2053,10 @@ xfs_da_do_buf(
if (!bp)
continue;
if (caller == 1) {
- if (whichfork == XFS_ATTR_FORK) {
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_ATTR_BTREE,
- XFS_ATTR_BTREE_REF);
- } else {
- XFS_BUF_SET_VTYPE_REF(bp, B_FS_DIR_BTREE,
- XFS_DIR_BTREE_REF);
- }
+ if (whichfork == XFS_ATTR_FORK)
+ xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
+ else
+ xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
}
if (bplist) {
bplist[nbplist++] = bp;
@@ -2070,25 +2066,22 @@ xfs_da_do_buf(
* Build a dabuf structure.
*/
if (bplist) {
- rbp = xfs_da_buf_make(nbplist, bplist, ra);
+ rbp = xfs_da_buf_make(nbplist, bplist);
} else if (bp)
- rbp = xfs_da_buf_make(1, &bp, ra);
+ rbp = xfs_da_buf_make(1, &bp);
else
rbp = NULL;
/*
* For read_buf, check the magic number.
*/
if (caller == 1) {
- xfs_dir2_data_t *data;
- xfs_dir2_free_t *free;
- xfs_da_blkinfo_t *info;
+ xfs_dir2_data_hdr_t *hdr = rbp->data;
+ xfs_dir2_free_t *free = rbp->data;
+ xfs_da_blkinfo_t *info = rbp->data;
uint magic, magic1;
- info = rbp->data;
- data = rbp->data;
- free = rbp->data;
magic = be16_to_cpu(info->magic);
- magic1 = be32_to_cpu(data->hdr.magic);
+ magic1 = be32_to_cpu(hdr->magic);
if (unlikely(
XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
(magic != XFS_ATTR_LEAF_MAGIC) &&
@@ -2096,7 +2089,7 @@ xfs_da_do_buf(
(magic != XFS_DIR2_LEAFN_MAGIC) &&
(magic1 != XFS_DIR2_BLOCK_MAGIC) &&
(magic1 != XFS_DIR2_DATA_MAGIC) &&
- (be32_to_cpu(free->hdr.magic) != XFS_DIR2_FREE_MAGIC),
+ (free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
mp, XFS_ERRTAG_DA_READ_BUF,
XFS_RANDOM_DA_READ_BUF))) {
trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
@@ -2143,8 +2136,7 @@ xfs_da_get_buf(
xfs_dabuf_t **bpp,
int whichfork)
{
- return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0,
- (inst_t *)__return_address);
+ return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
}
/*
@@ -2159,8 +2151,7 @@ xfs_da_read_buf(
xfs_dabuf_t **bpp,
int whichfork)
{
- return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1,
- (inst_t *)__return_address);
+ return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
}
/*
@@ -2176,8 +2167,7 @@ xfs_da_reada_buf(
xfs_daddr_t rval;
rval = -1;
- if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3,
- (inst_t *)__return_address))
+ if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
return -1;
else
return rval;
@@ -2235,17 +2225,12 @@ xfs_da_state_free(xfs_da_state_t *state)
kmem_zone_free(xfs_da_state_zone, state);
}
-#ifdef XFS_DABUF_DEBUG
-xfs_dabuf_t *xfs_dabuf_global_list;
-static DEFINE_SPINLOCK(xfs_dabuf_global_lock);
-#endif
-
/*
* Create a dabuf.
*/
/* ARGSUSED */
STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
+xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
{
xfs_buf_t *bp;
xfs_dabuf_t *dabuf;
@@ -2257,16 +2242,11 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
else
dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
dabuf->dirty = 0;
-#ifdef XFS_DABUF_DEBUG
- dabuf->ra = ra;
- dabuf->target = XFS_BUF_TARGET(bps[0]);
- dabuf->blkno = XFS_BUF_ADDR(bps[0]);
-#endif
if (nbuf == 1) {
dabuf->nbuf = 1;
bp = bps[0];
dabuf->bbcount = (short)BTOBB(XFS_BUF_COUNT(bp));
- dabuf->data = XFS_BUF_PTR(bp);
+ dabuf->data = bp->b_addr;
dabuf->bps[0] = bp;
} else {
dabuf->nbuf = nbuf;
@@ -2277,27 +2257,10 @@ xfs_da_buf_make(int nbuf, xfs_buf_t **bps, inst_t *ra)
dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
for (i = off = 0; i < nbuf; i++, off += XFS_BUF_COUNT(bp)) {
bp = bps[i];
- memcpy((char *)dabuf->data + off, XFS_BUF_PTR(bp),
+ memcpy((char *)dabuf->data + off, bp->b_addr,
XFS_BUF_COUNT(bp));
}
}
-#ifdef XFS_DABUF_DEBUG
- {
- xfs_dabuf_t *p;
-
- spin_lock(&xfs_dabuf_global_lock);
- for (p = xfs_dabuf_global_list; p; p = p->next) {
- ASSERT(p->blkno != dabuf->blkno ||
- p->target != dabuf->target);
- }
- dabuf->prev = NULL;
- if (xfs_dabuf_global_list)
- xfs_dabuf_global_list->prev = dabuf;
- dabuf->next = xfs_dabuf_global_list;
- xfs_dabuf_global_list = dabuf;
- spin_unlock(&xfs_dabuf_global_lock);
- }
-#endif
return dabuf;
}
@@ -2317,8 +2280,8 @@ xfs_da_buf_clean(xfs_dabuf_t *dabuf)
for (i = off = 0; i < dabuf->nbuf;
i++, off += XFS_BUF_COUNT(bp)) {
bp = dabuf->bps[i];
- memcpy(XFS_BUF_PTR(bp), (char *)dabuf->data + off,
- XFS_BUF_COUNT(bp));
+ memcpy(bp->b_addr, dabuf->data + off,
+ XFS_BUF_COUNT(bp));
}
}
}
@@ -2333,25 +2296,12 @@ xfs_da_buf_done(xfs_dabuf_t *dabuf)
ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
if (dabuf->dirty)
xfs_da_buf_clean(dabuf);
- if (dabuf->nbuf > 1)
+ if (dabuf->nbuf > 1) {
kmem_free(dabuf->data);
-#ifdef XFS_DABUF_DEBUG
- {
- spin_lock(&xfs_dabuf_global_lock);
- if (dabuf->prev)
- dabuf->prev->next = dabuf->next;
- else
- xfs_dabuf_global_list = dabuf->next;
- if (dabuf->next)
- dabuf->next->prev = dabuf->prev;
- spin_unlock(&xfs_dabuf_global_lock);
- }
- memset(dabuf, 0, XFS_DA_BUF_SIZE(dabuf->nbuf));
-#endif
- if (dabuf->nbuf == 1)
- kmem_zone_free(xfs_dabuf_zone, dabuf);
- else
kmem_free(dabuf);
+ } else {
+ kmem_zone_free(xfs_dabuf_zone, dabuf);
+ }
}
/*
@@ -2368,7 +2318,7 @@ xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
if (dabuf->nbuf == 1) {
- ASSERT(dabuf->data == (void *)XFS_BUF_PTR(dabuf->bps[0]));
+ ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
return;
}
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index fe9f5a8..dbf7c07 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -145,22 +145,11 @@ typedef struct xfs_dabuf {
short dirty; /* data needs to be copied back */
short bbcount; /* how large is data in bbs */
void *data; /* pointer for buffers' data */
-#ifdef XFS_DABUF_DEBUG
- inst_t *ra; /* return address of caller to make */
- struct xfs_dabuf *next; /* next in global chain */
- struct xfs_dabuf *prev; /* previous in global chain */
- struct xfs_buftarg *target; /* device for buffer */
- xfs_daddr_t blkno; /* daddr first in bps[0] */
-#endif
struct xfs_buf *bps[1]; /* actually nbuf of these */
} xfs_dabuf_t;
#define XFS_DA_BUF_SIZE(n) \
(sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
-#ifdef XFS_DABUF_DEBUG
-extern xfs_dabuf_t *xfs_dabuf_global_list;
-#endif
-
/*
* Storage for holding state during Btree searches and split/join ops.
*
@@ -248,6 +237,8 @@ int xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
* Utility routines.
*/
int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
+int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
+ int count);
int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
xfs_dabuf_t **bp, int whichfork);
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 9a84a85..654dc6f 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -425,8 +425,8 @@ xfs_swap_extents(
}
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ijoin_ref(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
xfs_trans_log_inode(tp, ip, ilf_fields);
xfs_trans_log_inode(tp, tip, tilf_fields);
@@ -438,7 +438,7 @@ xfs_swap_extents(
if (mp->m_flags & XFS_MOUNT_WSYNC)
xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
+ error = xfs_trans_commit(tp, 0);
trace_xfs_swap_extent_after(ip, 0);
trace_xfs_swap_extent_after(tip, 1);
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index dffba9b..a372163 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -148,7 +148,7 @@ typedef enum xfs_dinode_fmt {
be32_to_cpu((dip)->di_nextents) : \
be16_to_cpu((dip)->di_anextents))
-#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_DINODE(bp) ((xfs_dinode_t *)((bp)->b_addr))
/*
* For block and character special files the 32bit dev_t is stored at the
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index dba7a71..a2e2701 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -24,20 +24,17 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
@@ -122,15 +119,15 @@ int
xfs_dir_isempty(
xfs_inode_t *dp)
{
- xfs_dir2_sf_t *sfp;
+ xfs_dir2_sf_hdr_t *sfp;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if (dp->i_d.di_size == 0) /* might happen during shutdown. */
return 1;
if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
return 0;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- return !sfp->hdr.count;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ return !sfp->count;
}
/*
@@ -182,7 +179,7 @@ xfs_dir_init(
memset((char *)&args, 0, sizeof(args));
args.dp = dp;
args.trans = tp;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino)))
return error;
return xfs_dir2_sf_create(&args, pdp->i_ino);
@@ -205,7 +202,7 @@ xfs_dir_createname(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
XFS_STATS_INC(xs_dir_create);
@@ -281,7 +278,7 @@ xfs_dir_lookup(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_lookup);
memset(&args, 0, sizeof(xfs_da_args_t));
@@ -336,7 +333,7 @@ xfs_dir_removename(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_remove);
memset(&args, 0, sizeof(xfs_da_args_t));
@@ -385,7 +382,7 @@ xfs_readdir(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return XFS_ERROR(EIO);
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
XFS_STATS_INC(xs_dir_getdents);
if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL)
@@ -417,7 +414,7 @@ xfs_dir_replace(
int rval;
int v; /* type-checking value */
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum)))
return rval;
@@ -467,7 +464,7 @@ xfs_dir_canenter(
if (resblks)
return 0;
- ASSERT((dp->i_d.di_mode & S_IFMT) == S_IFDIR);
+ ASSERT(S_ISDIR(dp->i_d.di_mode));
memset(&args, 0, sizeof(xfs_da_args_t));
args.name = name->name;
@@ -500,129 +497,34 @@ xfs_dir_canenter(
/*
* Add a block to the directory.
- * This routine is for data and free blocks, not leaf/node blocks
- * which are handled by xfs_da_grow_inode.
+ *
+ * This routine is for data and free blocks, not leaf/node blocks which are
+ * handled by xfs_da_grow_inode.
*/
int
xfs_dir2_grow_inode(
- xfs_da_args_t *args,
- int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
- xfs_dir2_db_t *dbp) /* out: block number added */
+ struct xfs_da_args *args,
+ int space, /* v2 dir's space XFS_DIR2_xxx_SPACE */
+ xfs_dir2_db_t *dbp) /* out: block number added */
{
- xfs_fileoff_t bno; /* directory offset of new block */
- int count; /* count of filesystem blocks */
- xfs_inode_t *dp; /* incore directory inode */
- int error;
- int got; /* blocks actually mapped */
- int i;
- xfs_bmbt_irec_t map; /* single structure for bmap */
- int mapi; /* mapping index */
- xfs_bmbt_irec_t *mapp; /* bmap mapping structure(s) */
- xfs_mount_t *mp;
- int nmap; /* number of bmap entries */
- xfs_trans_t *tp;
- xfs_drfsbno_t nblks;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ xfs_fileoff_t bno; /* directory offset of new block */
+ int count; /* count of filesystem blocks */
+ int error;
trace_xfs_dir2_grow_inode(args, space);
- dp = args->dp;
- tp = args->trans;
- mp = dp->i_mount;
- nblks = dp->i_d.di_nblocks;
/*
* Set lowest possible block in the space requested.
*/
bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE);
count = mp->m_dirblkfsbs;
- /*
- * Find the first hole for our block.
- */
- if ((error = xfs_bmap_first_unused(tp, dp, count, &bno, XFS_DATA_FORK)))
- return error;
- nmap = 1;
- ASSERT(args->firstblock != NULL);
- /*
- * Try mapping the new block contiguously (one extent).
- */
- if ((error = xfs_bmapi(tp, dp, bno, count,
- XFS_BMAPI_WRITE|XFS_BMAPI_METADATA|XFS_BMAPI_CONTIG,
- args->firstblock, args->total, &map, &nmap,
- args->flist)))
- return error;
- ASSERT(nmap <= 1);
- if (nmap == 1) {
- mapp = &map;
- mapi = 1;
- }
- /*
- * Didn't work and this is a multiple-fsb directory block.
- * Try again with contiguous flag turned on.
- */
- else if (nmap == 0 && count > 1) {
- xfs_fileoff_t b; /* current file offset */
- /*
- * Space for maximum number of mappings.
- */
- mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
- /*
- * Iterate until we get to the end of our block.
- */
- for (b = bno, mapi = 0; b < bno + count; ) {
- int c; /* current fsb count */
-
- /*
- * Can't map more than MAX_NMAP at once.
- */
- nmap = MIN(XFS_BMAP_MAX_NMAP, count);
- c = (int)(bno + count - b);
- if ((error = xfs_bmapi(tp, dp, b, c,
- XFS_BMAPI_WRITE|XFS_BMAPI_METADATA,
- args->firstblock, args->total,
- &mapp[mapi], &nmap, args->flist))) {
- kmem_free(mapp);
- return error;
- }
- if (nmap < 1)
- break;
- /*
- * Add this bunch into our table, go to the next offset.
- */
- mapi += nmap;
- b = mapp[mapi - 1].br_startoff +
- mapp[mapi - 1].br_blockcount;
- }
- }
- /*
- * Didn't work.
- */
- else {
- mapi = 0;
- mapp = NULL;
- }
- /*
- * See how many fsb's we got.
- */
- for (i = 0, got = 0; i < mapi; i++)
- got += mapp[i].br_blockcount;
- /*
- * Didn't get enough fsb's, or the first/last block's are wrong.
- */
- if (got != count || mapp[0].br_startoff != bno ||
- mapp[mapi - 1].br_startoff + mapp[mapi - 1].br_blockcount !=
- bno + count) {
- if (mapp != &map)
- kmem_free(mapp);
- return XFS_ERROR(ENOSPC);
- }
- /*
- * Done with the temporary mapping table.
- */
- if (mapp != &map)
- kmem_free(mapp);
+ error = xfs_da_grow_inode_int(args, &bno, count);
+ if (error)
+ return error;
- /* account for newly allocated blocks in reserved blocks total */
- args->total -= dp->i_d.di_nblocks - nblks;
*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno);
/*
@@ -634,7 +536,7 @@ xfs_dir2_grow_inode(
size = XFS_FSB_TO_B(mp, bno + count);
if (size > dp->i_d.di_size) {
dp->i_d.di_size = size;
- xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
+ xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE);
}
}
return 0;
diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h
index 74a3b10..e937d99 100644
--- a/fs/xfs/xfs_dir2.h
+++ b/fs/xfs/xfs_dir2.h
@@ -16,49 +16,14 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef __XFS_DIR2_H__
-#define __XFS_DIR2_H__
+#define __XFS_DIR2_H__
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_put_args;
struct xfs_bmap_free;
+struct xfs_da_args;
struct xfs_inode;
struct xfs_mount;
struct xfs_trans;
-/*
- * Directory version 2.
- * There are 4 possible formats:
- * shortform
- * single block - data with embedded leaf at the end
- * multiple data blocks, single leaf+freeindex block
- * data blocks, node&leaf blocks (btree), freeindex blocks
- *
- * The shortform format is in xfs_dir2_sf.h.
- * The single block format is in xfs_dir2_block.h.
- * The data block format is in xfs_dir2_data.h.
- * The leaf and freeindex block formats are in xfs_dir2_leaf.h.
- * Node blocks are the same as the other version, in xfs_da_btree.h.
- */
-
-/*
- * Byte offset in data block and shortform entry.
- */
-typedef __uint16_t xfs_dir2_data_off_t;
-#define NULLDATAOFF 0xffffU
-typedef uint xfs_dir2_data_aoff_t; /* argument form */
-
-/*
- * Directory block number (logical dirblk in file)
- */
-typedef __uint32_t xfs_dir2_db_t;
-
-/*
- * Byte offset in a directory.
- */
-typedef xfs_off_t xfs_dir2_off_t;
-
extern struct xfs_name xfs_name_dotdot;
/*
@@ -86,21 +51,10 @@ extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_bmap_free *flist, xfs_extlen_t tot);
extern int xfs_dir_canenter(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, uint resblks);
-extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);
/*
- * Utility routines for v2 directories.
+ * Direct call from the bmap code, bypassing the generic directory layer.
*/
-extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
- xfs_dir2_db_t *dbp);
-extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp,
- int *vp);
-extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp,
- int *vp);
-extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
- struct xfs_dabuf *bp);
-
-extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
- const unsigned char *name, int len);
+extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
#endif /* __XFS_DIR2_H__ */
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 580d99c..9245e02 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -23,17 +23,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -67,7 +64,7 @@ xfs_dir2_block_addname(
xfs_da_args_t *args) /* directory op arguments */
{
xfs_dir2_data_free_t *bf; /* bestfree table in block */
- xfs_dir2_block_t *block; /* directory block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -105,13 +102,13 @@ xfs_dir2_block_addname(
return error;
}
ASSERT(bp != NULL);
- block = bp->data;
+ hdr = bp->data;
/*
* Check the magic number, corrupted if wrong.
*/
- if (unlikely(be32_to_cpu(block->hdr.magic) != XFS_DIR2_BLOCK_MAGIC)) {
+ if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
- XFS_ERRLEVEL_LOW, mp, block);
+ XFS_ERRLEVEL_LOW, mp, hdr);
xfs_da_brelse(tp, bp);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -119,8 +116,8 @@ xfs_dir2_block_addname(
/*
* Set up pointers to parts of the block.
*/
- bf = block->hdr.bestfree;
- btp = xfs_dir2_block_tail_p(mp, block);
+ bf = hdr->bestfree;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* No stale entries? Need space for entry and new leaf.
@@ -133,7 +130,7 @@ xfs_dir2_block_addname(
/*
* Data object just before the first leaf entry.
*/
- enddup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+ enddup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free then can't do this add without cleaning up:
* the space before the first leaf entry needs to be free so it
@@ -146,7 +143,7 @@ xfs_dir2_block_addname(
*/
else {
dup = (xfs_dir2_data_unused_t *)
- ((char *)block + be16_to_cpu(bf[0].offset));
+ ((char *)hdr + be16_to_cpu(bf[0].offset));
if (dup == enddup) {
/*
* It is the biggest freespace, is it too small
@@ -159,7 +156,7 @@ xfs_dir2_block_addname(
*/
if (be16_to_cpu(bf[1].length) >= len)
dup = (xfs_dir2_data_unused_t *)
- ((char *)block +
+ ((char *)hdr +
be16_to_cpu(bf[1].offset));
else
dup = NULL;
@@ -182,7 +179,7 @@ xfs_dir2_block_addname(
*/
else if (be16_to_cpu(bf[0].length) >= len) {
dup = (xfs_dir2_data_unused_t *)
- ((char *)block + be16_to_cpu(bf[0].offset));
+ ((char *)hdr + be16_to_cpu(bf[0].offset));
compact = 0;
}
/*
@@ -196,7 +193,7 @@ xfs_dir2_block_addname(
/*
* Data object just before the first leaf entry.
*/
- dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+ dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free then the data will go where the
* leaf data starts now, if it works at all.
@@ -255,7 +252,8 @@ xfs_dir2_block_addname(
highstale = lfloghigh = -1;
fromidx >= 0;
fromidx--) {
- if (be32_to_cpu(blp[fromidx].address) == XFS_DIR2_NULL_DATAPTR) {
+ if (blp[fromidx].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
if (highstale == -1)
highstale = toidx;
else {
@@ -272,7 +270,7 @@ xfs_dir2_block_addname(
lfloghigh -= be32_to_cpu(btp->stale) - 1;
be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1));
xfs_dir2_data_make_free(tp, bp,
- (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),
&needlog, &needscan);
blp += be32_to_cpu(btp->stale) - 1;
@@ -282,7 +280,7 @@ xfs_dir2_block_addname(
* This needs to happen before the next call to use_free.
*/
if (needscan) {
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
needscan = 0;
}
}
@@ -318,7 +316,7 @@ xfs_dir2_block_addname(
*/
xfs_dir2_data_use_free(tp, bp, enddup,
(xfs_dir2_data_aoff_t)
- ((char *)enddup - (char *)block + be16_to_cpu(enddup->length) -
+ ((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -
sizeof(*blp)),
(xfs_dir2_data_aoff_t)sizeof(*blp),
&needlog, &needscan);
@@ -331,8 +329,7 @@ xfs_dir2_block_addname(
* This needs to happen before the next call to use_free.
*/
if (needscan) {
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block,
- &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
needscan = 0;
}
/*
@@ -353,12 +350,14 @@ xfs_dir2_block_addname(
else {
for (lowstale = mid;
lowstale >= 0 &&
- be32_to_cpu(blp[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
+ blp[lowstale].address !=
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
lowstale--)
continue;
for (highstale = mid + 1;
highstale < be32_to_cpu(btp->count) &&
- be32_to_cpu(blp[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
+ blp[highstale].address !=
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR) &&
(lowstale < 0 || mid - lowstale > highstale - mid);
highstale++)
continue;
@@ -397,13 +396,13 @@ xfs_dir2_block_addname(
*/
blp[mid].hashval = cpu_to_be32(args->hashval);
blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
+ (char *)dep - (char *)hdr));
xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);
/*
* Mark space for the data entry used.
*/
xfs_dir2_data_use_free(tp, bp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
(xfs_dir2_data_aoff_t)len, &needlog, &needscan);
/*
* Create the new data entry.
@@ -412,12 +411,12 @@ xfs_dir2_block_addname(
dep->namelen = args->namelen;
memcpy(dep->name, args->name, args->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Clean up the bestfree array and log the header, tail, and entry.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, bp);
xfs_dir2_block_log_tail(tp, bp);
@@ -437,7 +436,7 @@ xfs_dir2_block_getdents(
xfs_off_t *offset,
filldir_t filldir)
{
- xfs_dir2_block_t *block; /* directory block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dabuf_t *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
@@ -470,13 +469,13 @@ xfs_dir2_block_getdents(
* We'll skip entries before this.
*/
wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
- block = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
/*
* Set up values for the loop.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
- ptr = (char *)block->u;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
+ ptr = (char *)(hdr + 1);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
/*
@@ -502,11 +501,11 @@ xfs_dir2_block_getdents(
/*
* The entry is before the desired starting point, skip it.
*/
- if ((char *)dep - (char *)block < wantoff)
+ if ((char *)dep - (char *)hdr < wantoff)
continue;
cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
- (char *)dep - (char *)block);
+ (char *)dep - (char *)hdr);
/*
* If it didn't fit, set the final offset to here & return.
@@ -540,17 +539,14 @@ xfs_dir2_block_log_leaf(
int first, /* index of first logged leaf */
int last) /* index of last logged leaf */
{
- xfs_dir2_block_t *block; /* directory block structure */
- xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dir2_block_tail_t *btp; /* block tail */
- xfs_mount_t *mp; /* filesystem mount point */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_leaf_entry_t *blp;
+ xfs_dir2_block_tail_t *btp;
- mp = tp->t_mountp;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
- xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)block),
- (uint)((char *)&blp[last + 1] - (char *)block - 1));
+ xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
+ (uint)((char *)&blp[last + 1] - (char *)hdr - 1));
}
/*
@@ -561,15 +557,12 @@ xfs_dir2_block_log_tail(
xfs_trans_t *tp, /* transaction structure */
xfs_dabuf_t *bp) /* block buffer */
{
- xfs_dir2_block_t *block; /* directory block structure */
- xfs_dir2_block_tail_t *btp; /* block tail */
- xfs_mount_t *mp; /* filesystem mount point */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_block_tail_t *btp;
- mp = tp->t_mountp;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
- xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)block),
- (uint)((char *)(btp + 1) - (char *)block - 1));
+ btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
+ xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
+ (uint)((char *)(btp + 1) - (char *)hdr - 1));
}
/*
@@ -580,7 +573,7 @@ int /* error */
xfs_dir2_block_lookup(
xfs_da_args_t *args) /* dir lookup arguments */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -600,14 +593,14 @@ xfs_dir2_block_lookup(
return error;
dp = args->dp;
mp = dp->i_mount;
- block = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Get the offset from the leaf entry, to point to the data.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)block +
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr +
xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
/*
* Fill in inode number, CI name if appropriate, release the block.
@@ -628,7 +621,7 @@ xfs_dir2_block_lookup_int(
int *entno) /* returned entry number */
{
xfs_dir2_dataptr_t addr; /* data entry address */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -654,9 +647,9 @@ xfs_dir2_block_lookup_int(
return error;
}
ASSERT(bp != NULL);
- block = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Loop doing a binary search for our hash value.
@@ -694,7 +687,7 @@ xfs_dir2_block_lookup_int(
* Get pointer to the entry from the leaf.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
/*
* Compare name and if it's an exact match, return the index
* and buffer. If it's the first case-insensitive match, store
@@ -733,7 +726,7 @@ int /* error */
xfs_dir2_block_removename(
xfs_da_args_t *args) /* directory operation args */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -760,20 +753,20 @@ xfs_dir2_block_removename(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
+ hdr = bp->data;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Point to the data entry using the leaf entry.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
/*
* Mark the data entry's space free.
*/
needlog = needscan = 0;
xfs_dir2_data_make_free(tp, bp,
- (xfs_dir2_data_aoff_t)((char *)dep - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
/*
* Fix up the block tail.
@@ -789,15 +782,15 @@ xfs_dir2_block_removename(
* Fix up bestfree, log the header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, bp);
xfs_dir2_data_check(dp, bp);
/*
* See if the size as a shortform is good enough.
*/
- if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
- XFS_IFORK_DSIZE(dp)) {
+ size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+ if (size > XFS_IFORK_DSIZE(dp)) {
xfs_da_buf_done(bp);
return 0;
}
@@ -815,7 +808,7 @@ int /* error */
xfs_dir2_block_replace(
xfs_da_args_t *args) /* directory operation args */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -836,14 +829,14 @@ xfs_dir2_block_replace(
}
dp = args->dp;
mp = dp->i_mount;
- block = bp->data;
- btp = xfs_dir2_block_tail_p(mp, block);
+ hdr = bp->data;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Point to the data entry we need to change.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address)));
ASSERT(be64_to_cpu(dep->inumber) != args->inumber);
/*
* Change the inode number to the new value.
@@ -882,7 +875,7 @@ xfs_dir2_leaf_to_block(
xfs_dabuf_t *dbp) /* data buffer */
{
__be16 *bestsp; /* leaf bests table */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* unused data entry */
@@ -906,7 +899,7 @@ xfs_dir2_leaf_to_block(
tp = args->trans;
mp = dp->i_mount;
leaf = lbp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
/*
* If there are data blocks other than the first one, take this
@@ -917,7 +910,7 @@ xfs_dir2_leaf_to_block(
while (dp->i_d.di_size > mp->m_dirblksize) {
bestsp = xfs_dir2_leaf_bests_p(ltp);
if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
- mp->m_dirblksize - (uint)sizeof(block->hdr)) {
+ mp->m_dirblksize - (uint)sizeof(*hdr)) {
if ((error =
xfs_dir2_leaf_trim_data(args, lbp,
(xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
@@ -935,18 +928,18 @@ xfs_dir2_leaf_to_block(
XFS_DATA_FORK))) {
goto out;
}
- block = dbp->data;
- ASSERT(be32_to_cpu(block->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+ hdr = dbp->data;
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
/*
* Size of the "leaf" area in the block.
*/
- size = (uint)sizeof(block->tail) +
+ size = (uint)sizeof(xfs_dir2_block_tail_t) +
(uint)sizeof(*lep) * (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
/*
* Look at the last data entry.
*/
- tagp = (__be16 *)((char *)block + mp->m_dirblksize) - 1;
- dup = (xfs_dir2_data_unused_t *)((char *)block + be16_to_cpu(*tagp));
+ tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1;
+ dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
/*
* If it's not free or is too short we can't do it.
*/
@@ -958,7 +951,7 @@ xfs_dir2_leaf_to_block(
/*
* Start converting it to block form.
*/
- block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+ hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
needlog = 1;
needscan = 0;
/*
@@ -969,7 +962,7 @@ xfs_dir2_leaf_to_block(
/*
* Initialize the block tail.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
btp->count = cpu_to_be32(be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale));
btp->stale = 0;
xfs_dir2_block_log_tail(tp, dbp);
@@ -978,7 +971,8 @@ xfs_dir2_leaf_to_block(
*/
lep = xfs_dir2_block_leaf_p(btp);
for (from = to = 0; from < be16_to_cpu(leaf->hdr.count); from++) {
- if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
lep[to++] = leaf->ents[from];
}
@@ -988,7 +982,7 @@ xfs_dir2_leaf_to_block(
* Scan the bestfree if we need it and log the data block header.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
/*
@@ -1002,8 +996,8 @@ xfs_dir2_leaf_to_block(
/*
* Now see if the resulting block can be shrunken to shortform.
*/
- if ((size = xfs_dir2_block_sfsize(dp, block, &sfh)) >
- XFS_IFORK_DSIZE(dp)) {
+ size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
+ if (size > XFS_IFORK_DSIZE(dp)) {
error = 0;
goto out;
}
@@ -1024,12 +1018,10 @@ xfs_dir2_sf_to_block(
xfs_da_args_t *args) /* operation arguments */
{
xfs_dir2_db_t blkno; /* dir-relative block # (0) */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
xfs_dabuf_t *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail pointer */
- char *buf; /* sf buffer */
- int buf_len;
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
int dummy; /* trash */
@@ -1043,7 +1035,8 @@ xfs_dir2_sf_to_block(
int newoffset; /* offset from current entry */
int offset; /* target block offset */
xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform header */
__be16 *tagp; /* end of data entry */
xfs_trans_t *tp; /* transaction pointer */
struct xfs_name name;
@@ -1061,32 +1054,30 @@ xfs_dir2_sf_to_block(
ASSERT(XFS_FORCED_SHUTDOWN(mp));
return XFS_ERROR(EIO);
}
+
+ oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(oldsfp->i8count));
+
/*
- * Copy the directory into the stack buffer.
+ * Copy the directory into a temporary buffer.
* Then pitch the incore inode data so we can make extents.
*/
+ sfp = kmem_alloc(dp->i_df.if_bytes, KM_SLEEP);
+ memcpy(sfp, oldsfp, dp->i_df.if_bytes);
- buf_len = dp->i_df.if_bytes;
- buf = kmem_alloc(buf_len, KM_SLEEP);
-
- memcpy(buf, sfp, buf_len);
- xfs_idata_realloc(dp, -buf_len, XFS_DATA_FORK);
+ xfs_idata_realloc(dp, -dp->i_df.if_bytes, XFS_DATA_FORK);
dp->i_d.di_size = 0;
xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
- /*
- * Reset pointer - old sfp is gone.
- */
- sfp = (xfs_dir2_sf_t *)buf;
+
/*
* Add block 0 to the inode.
*/
error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, &blkno);
if (error) {
- kmem_free(buf);
+ kmem_free(sfp);
return error;
}
/*
@@ -1094,21 +1085,21 @@ xfs_dir2_sf_to_block(
*/
error = xfs_dir2_data_init(args, blkno, &bp);
if (error) {
- kmem_free(buf);
+ kmem_free(sfp);
return error;
}
- block = bp->data;
- block->hdr.magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
+ hdr = bp->data;
+ hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
/*
* Compute size of block "tail" area.
*/
i = (uint)sizeof(*btp) +
- (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t);
/*
* The whole thing is initialized to free by the init routine.
* Say we're using the leaf and tail area.
*/
- dup = (xfs_dir2_data_unused_t *)block->u;
+ dup = (xfs_dir2_data_unused_t *)(hdr + 1);
needlog = needscan = 0;
xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog,
&needscan);
@@ -1116,50 +1107,51 @@ xfs_dir2_sf_to_block(
/*
* Fill in the tail.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
- btp->count = cpu_to_be32(sfp->hdr.count + 2); /* ., .. */
+ btp = xfs_dir2_block_tail_p(mp, hdr);
+ btp->count = cpu_to_be32(sfp->count + 2); /* ., .. */
btp->stale = 0;
blp = xfs_dir2_block_leaf_p(btp);
- endoffset = (uint)((char *)blp - (char *)block);
+ endoffset = (uint)((char *)blp - (char *)hdr);
/*
* Remove the freespace, we'll manage it.
*/
xfs_dir2_data_use_free(tp, bp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)block),
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),
be16_to_cpu(dup->length), &needlog, &needscan);
/*
* Create entry for .
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + XFS_DIR2_DATA_DOT_OFFSET);
+ ((char *)hdr + XFS_DIR2_DATA_DOT_OFFSET);
dep->inumber = cpu_to_be64(dp->i_ino);
dep->namelen = 1;
dep->name[0] = '.';
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, bp, dep);
blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
+ (char *)dep - (char *)hdr));
/*
* Create entry for ..
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + XFS_DIR2_DATA_DOTDOT_OFFSET);
- dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+ ((char *)hdr + XFS_DIR2_DATA_DOTDOT_OFFSET);
+ dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
dep->namelen = 2;
dep->name[0] = dep->name[1] = '.';
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, bp, dep);
blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
+ (char *)dep - (char *)hdr));
offset = XFS_DIR2_DATA_FIRST_OFFSET;
/*
* Loop over existing entries, stuff them in.
*/
- if ((i = 0) == sfp->hdr.count)
+ i = 0;
+ if (!sfp->count)
sfep = NULL;
else
sfep = xfs_dir2_sf_firstentry(sfp);
@@ -1179,43 +1171,40 @@ xfs_dir2_sf_to_block(
* There should be a hole here, make one.
*/
if (offset < newoffset) {
- dup = (xfs_dir2_data_unused_t *)
- ((char *)block + offset);
+ dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
dup->length = cpu_to_be16(newoffset - offset);
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
- ((char *)dup - (char *)block));
+ ((char *)dup - (char *)hdr));
xfs_dir2_data_log_unused(tp, bp, dup);
- (void)xfs_dir2_data_freeinsert((xfs_dir2_data_t *)block,
- dup, &dummy);
+ xfs_dir2_data_freeinsert(hdr, dup, &dummy);
offset += be16_to_cpu(dup->length);
continue;
}
/*
* Copy a real entry.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)block + newoffset);
- dep->inumber = cpu_to_be64(xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep)));
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
+ dep->inumber = cpu_to_be64(xfs_dir2_sfe_get_ino(sfp, sfep));
dep->namelen = sfep->namelen;
memcpy(dep->name, sfep->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)block);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, bp, dep);
name.name = sfep->name;
name.len = sfep->namelen;
blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
hashname(&name));
blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp,
- (char *)dep - (char *)block));
- offset = (int)((char *)(tagp + 1) - (char *)block);
- if (++i == sfp->hdr.count)
+ (char *)dep - (char *)hdr));
+ offset = (int)((char *)(tagp + 1) - (char *)hdr);
+ if (++i == sfp->count)
sfep = NULL;
else
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
}
/* Done with the temporary buffer */
- kmem_free(buf);
+ kmem_free(sfp);
/*
* Sort the leaf entries by hash value.
*/
diff --git a/fs/xfs/xfs_dir2_block.h b/fs/xfs/xfs_dir2_block.h
deleted file mode 100644
index 10e6896..0000000
--- a/fs/xfs/xfs_dir2_block.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_BLOCK_H__
-#define __XFS_DIR2_BLOCK_H__
-
-/*
- * xfs_dir2_block.h
- * Directory version 2, single block format structures
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_data_hdr;
-struct xfs_dir2_leaf_entry;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * The single block format is as follows:
- * xfs_dir2_data_hdr_t structure
- * xfs_dir2_data_entry_t and xfs_dir2_data_unused_t structures
- * xfs_dir2_leaf_entry_t structures
- * xfs_dir2_block_tail_t structure
- */
-
-#define XFS_DIR2_BLOCK_MAGIC 0x58443242 /* XD2B: for one block dirs */
-
-typedef struct xfs_dir2_block_tail {
- __be32 count; /* count of leaf entries */
- __be32 stale; /* count of stale lf entries */
-} xfs_dir2_block_tail_t;
-
-/*
- * Generic single-block structure, for xfs_db.
- */
-typedef struct xfs_dir2_block {
- xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_BLOCK_MAGIC */
- xfs_dir2_data_union_t u[1];
- xfs_dir2_leaf_entry_t leaf[1];
- xfs_dir2_block_tail_t tail;
-} xfs_dir2_block_t;
-
-/*
- * Pointer to the leaf header embedded in a data block (1-block format)
- */
-static inline xfs_dir2_block_tail_t *
-xfs_dir2_block_tail_p(struct xfs_mount *mp, xfs_dir2_block_t *block)
-{
- return (((xfs_dir2_block_tail_t *)
- ((char *)(block) + (mp)->m_dirblksize)) - 1);
-}
-
-/*
- * Pointer to the leaf entries embedded in a data block (1-block format)
- */
-static inline struct xfs_dir2_leaf_entry *
-xfs_dir2_block_leaf_p(xfs_dir2_block_tail_t *btp)
-{
- return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);
-}
-
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_addname(struct xfs_da_args *args);
-extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent,
- xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_block_removename(struct xfs_da_args *args);
-extern int xfs_dir2_block_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
- struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
-extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_BLOCK_H__ */
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 921595b..5bbe2a8 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -23,18 +23,18 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
+STATIC xfs_dir2_data_free_t *
+xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
+
#ifdef DEBUG
/*
* Check the consistency of the data block.
@@ -50,7 +50,7 @@ xfs_dir2_data_check(
xfs_dir2_data_free_t *bf; /* bestfree table */
xfs_dir2_block_tail_t *btp=NULL; /* block tail */
int count; /* count of entries found */
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_free_t *dfp; /* bestfree entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
@@ -66,17 +66,19 @@ xfs_dir2_data_check(
struct xfs_name name;
mp = dp->i_mount;
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- bf = d->hdr.bestfree;
- p = (char *)d->u;
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
- btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+ hdr = bp->data;
+ bf = hdr->bestfree;
+ p = (char *)(hdr + 1);
+
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+ btp = xfs_dir2_block_tail_p(mp, hdr);
lep = xfs_dir2_block_leaf_p(btp);
endp = (char *)lep;
- } else
- endp = (char *)d + mp->m_dirblksize;
+ } else {
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+ endp = (char *)hdr + mp->m_dirblksize;
+ }
+
count = lastfree = freeseen = 0;
/*
* Account for zero bestfree entries.
@@ -108,8 +110,8 @@ xfs_dir2_data_check(
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
ASSERT(lastfree == 0);
ASSERT(be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==
- (char *)dup - (char *)d);
- dfp = xfs_dir2_data_freefind(d, dup);
+ (char *)dup - (char *)hdr);
+ dfp = xfs_dir2_data_freefind(hdr, dup);
if (dfp) {
i = (int)(dfp - bf);
ASSERT((freeseen & (1 << i)) == 0);
@@ -132,13 +134,13 @@ xfs_dir2_data_check(
ASSERT(dep->namelen != 0);
ASSERT(xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)) == 0);
ASSERT(be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)) ==
- (char *)dep - (char *)d);
+ (char *)dep - (char *)hdr);
count++;
lastfree = 0;
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
(xfs_dir2_data_aoff_t)
- ((char *)dep - (char *)d));
+ ((char *)dep - (char *)hdr));
name.name = dep->name;
name.len = dep->namelen;
hash = mp->m_dirnameops->hashname(&name);
@@ -155,9 +157,10 @@ xfs_dir2_data_check(
* Need to have seen all the entries and all the bestfree slots.
*/
ASSERT(freeseen == 7);
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
for (i = stale = 0; i < be32_to_cpu(btp->count); i++) {
- if (be32_to_cpu(lep[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (lep[i].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
if (i > 0)
ASSERT(be32_to_cpu(lep[i].hashval) >= be32_to_cpu(lep[i - 1].hashval));
@@ -172,9 +175,9 @@ xfs_dir2_data_check(
* Given a data block and an unused entry from that block,
* return the bestfree entry if any that corresponds to it.
*/
-xfs_dir2_data_free_t *
+STATIC xfs_dir2_data_free_t *
xfs_dir2_data_freefind(
- xfs_dir2_data_t *d, /* data block */
+ xfs_dir2_data_hdr_t *hdr, /* data block */
xfs_dir2_data_unused_t *dup) /* data unused entry */
{
xfs_dir2_data_free_t *dfp; /* bestfree entry */
@@ -184,17 +187,17 @@ xfs_dir2_data_freefind(
int seenzero; /* saw a 0 bestfree entry */
#endif
- off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)d);
+ off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
#if defined(DEBUG) && defined(__KERNEL__)
/*
* Validate some consistency in the bestfree table.
* Check order, non-overlapping entries, and if we find the
* one we're looking for it has to be exact.
*/
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- for (dfp = &d->hdr.bestfree[0], seenzero = matched = 0;
- dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+ for (dfp = &hdr->bestfree[0], seenzero = matched = 0;
+ dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
dfp++) {
if (!dfp->offset) {
ASSERT(!dfp->length);
@@ -210,7 +213,7 @@ xfs_dir2_data_freefind(
else
ASSERT(be16_to_cpu(dfp->offset) + be16_to_cpu(dfp->length) <= off);
ASSERT(matched || be16_to_cpu(dfp->length) >= be16_to_cpu(dup->length));
- if (dfp > &d->hdr.bestfree[0])
+ if (dfp > &hdr->bestfree[0])
ASSERT(be16_to_cpu(dfp[-1].length) >= be16_to_cpu(dfp[0].length));
}
#endif
@@ -219,13 +222,13 @@ xfs_dir2_data_freefind(
* it can't be there since they're sorted.
*/
if (be16_to_cpu(dup->length) <
- be16_to_cpu(d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
+ be16_to_cpu(hdr->bestfree[XFS_DIR2_DATA_FD_COUNT - 1].length))
return NULL;
/*
* Look at the three bestfree entries for our guy.
*/
- for (dfp = &d->hdr.bestfree[0];
- dfp < &d->hdr.bestfree[XFS_DIR2_DATA_FD_COUNT];
+ for (dfp = &hdr->bestfree[0];
+ dfp < &hdr->bestfree[XFS_DIR2_DATA_FD_COUNT];
dfp++) {
if (!dfp->offset)
return NULL;
@@ -243,7 +246,7 @@ xfs_dir2_data_freefind(
*/
xfs_dir2_data_free_t * /* entry inserted */
xfs_dir2_data_freeinsert(
- xfs_dir2_data_t *d, /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block pointer */
xfs_dir2_data_unused_t *dup, /* unused space */
int *loghead) /* log the data header (out) */
{
@@ -251,12 +254,13 @@ xfs_dir2_data_freeinsert(
xfs_dir2_data_free_t new; /* new bestfree entry */
#ifdef __KERNEL__
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
#endif
- dfp = d->hdr.bestfree;
+ dfp = hdr->bestfree;
new.length = dup->length;
- new.offset = cpu_to_be16((char *)dup - (char *)d);
+ new.offset = cpu_to_be16((char *)dup - (char *)hdr);
+
/*
* Insert at position 0, 1, or 2; or not at all.
*/
@@ -286,36 +290,36 @@ xfs_dir2_data_freeinsert(
*/
STATIC void
xfs_dir2_data_freeremove(
- xfs_dir2_data_t *d, /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block header */
xfs_dir2_data_free_t *dfp, /* bestfree entry pointer */
int *loghead) /* out: log data header */
{
#ifdef __KERNEL__
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
#endif
/*
* It's the first entry, slide the next 2 up.
*/
- if (dfp == &d->hdr.bestfree[0]) {
- d->hdr.bestfree[0] = d->hdr.bestfree[1];
- d->hdr.bestfree[1] = d->hdr.bestfree[2];
+ if (dfp == &hdr->bestfree[0]) {
+ hdr->bestfree[0] = hdr->bestfree[1];
+ hdr->bestfree[1] = hdr->bestfree[2];
}
/*
* It's the second entry, slide the 3rd entry up.
*/
- else if (dfp == &d->hdr.bestfree[1])
- d->hdr.bestfree[1] = d->hdr.bestfree[2];
+ else if (dfp == &hdr->bestfree[1])
+ hdr->bestfree[1] = hdr->bestfree[2];
/*
* Must be the last entry.
*/
else
- ASSERT(dfp == &d->hdr.bestfree[2]);
+ ASSERT(dfp == &hdr->bestfree[2]);
/*
* Clear the 3rd entry, must be zero now.
*/
- d->hdr.bestfree[2].length = 0;
- d->hdr.bestfree[2].offset = 0;
+ hdr->bestfree[2].length = 0;
+ hdr->bestfree[2].offset = 0;
*loghead = 1;
}
@@ -325,7 +329,7 @@ xfs_dir2_data_freeremove(
void
xfs_dir2_data_freescan(
xfs_mount_t *mp, /* filesystem mount point */
- xfs_dir2_data_t *d, /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr, /* data block header */
int *loghead) /* out: log data header */
{
xfs_dir2_block_tail_t *btp; /* block tail */
@@ -335,23 +339,23 @@ xfs_dir2_data_freescan(
char *p; /* current entry pointer */
#ifdef __KERNEL__
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
#endif
/*
* Start by clearing the table.
*/
- memset(d->hdr.bestfree, 0, sizeof(d->hdr.bestfree));
+ memset(hdr->bestfree, 0, sizeof(hdr->bestfree));
*loghead = 1;
/*
* Set up pointers.
*/
- p = (char *)d->u;
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC) {
- btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+ p = (char *)(hdr + 1);
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC)) {
+ btp = xfs_dir2_block_tail_p(mp, hdr);
endp = (char *)xfs_dir2_block_leaf_p(btp);
} else
- endp = (char *)d + mp->m_dirblksize;
+ endp = (char *)hdr + mp->m_dirblksize;
/*
* Loop over the block's entries.
*/
@@ -361,9 +365,9 @@ xfs_dir2_data_freescan(
* If it's a free entry, insert it.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- ASSERT((char *)dup - (char *)d ==
+ ASSERT((char *)dup - (char *)hdr ==
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
- xfs_dir2_data_freeinsert(d, dup, loghead);
+ xfs_dir2_data_freeinsert(hdr, dup, loghead);
p += be16_to_cpu(dup->length);
}
/*
@@ -371,7 +375,7 @@ xfs_dir2_data_freescan(
*/
else {
dep = (xfs_dir2_data_entry_t *)p;
- ASSERT((char *)dep - (char *)d ==
+ ASSERT((char *)dep - (char *)hdr ==
be16_to_cpu(*xfs_dir2_data_entry_tag_p(dep)));
p += xfs_dir2_data_entsize(dep->namelen);
}
@@ -389,7 +393,7 @@ xfs_dir2_data_init(
xfs_dabuf_t **bpp) /* output block buffer */
{
xfs_dabuf_t *bp; /* block buffer */
- xfs_dir2_data_t *d; /* pointer to block */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* unused entry pointer */
int error; /* error return value */
@@ -410,26 +414,28 @@ xfs_dir2_data_init(
return error;
}
ASSERT(bp != NULL);
+
/*
* Initialize the header.
*/
- d = bp->data;
- d->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
- d->hdr.bestfree[0].offset = cpu_to_be16(sizeof(d->hdr));
+ hdr = bp->data;
+ hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+ hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
- d->hdr.bestfree[i].length = 0;
- d->hdr.bestfree[i].offset = 0;
+ hdr->bestfree[i].length = 0;
+ hdr->bestfree[i].offset = 0;
}
+
/*
* Set up an unused entry for the block's body.
*/
- dup = &d->u[0].unused;
+ dup = (xfs_dir2_data_unused_t *)(hdr + 1);
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
- t=mp->m_dirblksize - (uint)sizeof(d->hdr);
- d->hdr.bestfree[0].length = cpu_to_be16(t);
+ t = mp->m_dirblksize - (uint)sizeof(*hdr);
+ hdr->bestfree[0].length = cpu_to_be16(t);
dup->length = cpu_to_be16(t);
- *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)d);
+ *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
/*
* Log it and return it.
*/
@@ -448,14 +454,14 @@ xfs_dir2_data_log_entry(
xfs_dabuf_t *bp, /* block buffer */
xfs_dir2_data_entry_t *dep) /* data entry pointer */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
+
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)d),
+ xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
- (char *)d - 1));
+ (char *)hdr - 1));
}
/*
@@ -466,13 +472,12 @@ xfs_dir2_data_log_header(
xfs_trans_t *tp, /* transaction pointer */
xfs_dabuf_t *bp) /* block buffer */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- xfs_da_log_buf(tp, bp, (uint)((char *)&d->hdr - (char *)d),
- (uint)(sizeof(d->hdr) - 1));
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+
+ xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
}
/*
@@ -484,23 +489,23 @@ xfs_dir2_data_log_unused(
xfs_dabuf_t *bp, /* block buffer */
xfs_dir2_data_unused_t *dup) /* data unused pointer */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr = bp->data;
+
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
/*
* Log the first part of the unused entry.
*/
- xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)d),
+ xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
(uint)((char *)&dup->length + sizeof(dup->length) -
- 1 - (char *)d));
+ 1 - (char *)hdr));
/*
* Log the end (tag) of the unused entry.
*/
xfs_da_log_buf(tp, bp,
- (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d),
- (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)d +
+ (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
+ (uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
sizeof(xfs_dir2_data_off_t) - 1));
}
@@ -517,7 +522,7 @@ xfs_dir2_data_make_free(
int *needlogp, /* out: log header */
int *needscanp) /* out: regen bestfree */
{
- xfs_dir2_data_t *d; /* data block pointer */
+ xfs_dir2_data_hdr_t *hdr; /* data block pointer */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
char *endptr; /* end of data area */
xfs_mount_t *mp; /* filesystem mount point */
@@ -527,28 +532,29 @@ xfs_dir2_data_make_free(
xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
mp = tp->t_mountp;
- d = bp->data;
+ hdr = bp->data;
+
/*
* Figure out where the end of the data area is.
*/
- if (be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC)
- endptr = (char *)d + mp->m_dirblksize;
+ if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC))
+ endptr = (char *)hdr + mp->m_dirblksize;
else {
xfs_dir2_block_tail_t *btp; /* block tail */
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
- btp = xfs_dir2_block_tail_p(mp, (xfs_dir2_block_t *)d);
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
+ btp = xfs_dir2_block_tail_p(mp, hdr);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
}
/*
* If this isn't the start of the block, then back up to
* the previous entry and see if it's free.
*/
- if (offset > sizeof(d->hdr)) {
+ if (offset > sizeof(*hdr)) {
__be16 *tagp; /* tag just before us */
- tagp = (__be16 *)((char *)d + offset) - 1;
- prevdup = (xfs_dir2_data_unused_t *)((char *)d + be16_to_cpu(*tagp));
+ tagp = (__be16 *)((char *)hdr + offset) - 1;
+ prevdup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));
if (be16_to_cpu(prevdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
prevdup = NULL;
} else
@@ -557,9 +563,9 @@ xfs_dir2_data_make_free(
* If this isn't the end of the block, see if the entry after
* us is free.
*/
- if ((char *)d + offset + len < endptr) {
+ if ((char *)hdr + offset + len < endptr) {
postdup =
- (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+ (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
postdup = NULL;
} else
@@ -576,21 +582,21 @@ xfs_dir2_data_make_free(
/*
* See if prevdup and/or postdup are in bestfree table.
*/
- dfp = xfs_dir2_data_freefind(d, prevdup);
- dfp2 = xfs_dir2_data_freefind(d, postdup);
+ dfp = xfs_dir2_data_freefind(hdr, prevdup);
+ dfp2 = xfs_dir2_data_freefind(hdr, postdup);
/*
* We need a rescan unless there are exactly 2 free entries
* namely our two. Then we know what's happening, otherwise
* since the third bestfree is there, there might be more
* entries.
*/
- needscan = (d->hdr.bestfree[2].length != 0);
+ needscan = (hdr->bestfree[2].length != 0);
/*
* Fix up the new big freespace.
*/
be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));
*xfs_dir2_data_unused_tag_p(prevdup) =
- cpu_to_be16((char *)prevdup - (char *)d);
+ cpu_to_be16((char *)prevdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, prevdup);
if (!needscan) {
/*
@@ -600,18 +606,18 @@ xfs_dir2_data_make_free(
* Remove entry 1 first then entry 0.
*/
ASSERT(dfp && dfp2);
- if (dfp == &d->hdr.bestfree[1]) {
- dfp = &d->hdr.bestfree[0];
+ if (dfp == &hdr->bestfree[1]) {
+ dfp = &hdr->bestfree[0];
ASSERT(dfp2 == dfp);
- dfp2 = &d->hdr.bestfree[1];
+ dfp2 = &hdr->bestfree[1];
}
- xfs_dir2_data_freeremove(d, dfp2, needlogp);
- xfs_dir2_data_freeremove(d, dfp, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp2, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
/*
* Now insert the new entry.
*/
- dfp = xfs_dir2_data_freeinsert(d, prevdup, needlogp);
- ASSERT(dfp == &d->hdr.bestfree[0]);
+ dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
+ ASSERT(dfp == &hdr->bestfree[0]);
ASSERT(dfp->length == prevdup->length);
ASSERT(!dfp[1].length);
ASSERT(!dfp[2].length);
@@ -621,10 +627,10 @@ xfs_dir2_data_make_free(
* The entry before us is free, merge with it.
*/
else if (prevdup) {
- dfp = xfs_dir2_data_freefind(d, prevdup);
+ dfp = xfs_dir2_data_freefind(hdr, prevdup);
be16_add_cpu(&prevdup->length, len);
*xfs_dir2_data_unused_tag_p(prevdup) =
- cpu_to_be16((char *)prevdup - (char *)d);
+ cpu_to_be16((char *)prevdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, prevdup);
/*
* If the previous entry was in the table, the new entry
@@ -632,27 +638,27 @@ xfs_dir2_data_make_free(
* the old one and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- (void)xfs_dir2_data_freeinsert(d, prevdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, prevdup, needlogp);
}
/*
* Otherwise we need a scan if the new entry is big enough.
*/
else {
needscan = be16_to_cpu(prevdup->length) >
- be16_to_cpu(d->hdr.bestfree[2].length);
+ be16_to_cpu(hdr->bestfree[2].length);
}
}
/*
* The following entry is free, merge with it.
*/
else if (postdup) {
- dfp = xfs_dir2_data_freefind(d, postdup);
- newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+ dfp = xfs_dir2_data_freefind(hdr, postdup);
+ newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
/*
* If the following entry was in the table, the new entry
@@ -660,28 +666,28 @@ xfs_dir2_data_make_free(
* the old one and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
}
/*
* Otherwise we need a scan if the new entry is big enough.
*/
else {
needscan = be16_to_cpu(newdup->length) >
- be16_to_cpu(d->hdr.bestfree[2].length);
+ be16_to_cpu(hdr->bestfree[2].length);
}
}
/*
* Neither neighbor is free. Make a new entry.
*/
else {
- newdup = (xfs_dir2_data_unused_t *)((char *)d + offset);
+ newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(len);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
- (void)xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
}
*needscanp = needscan;
}
@@ -699,7 +705,7 @@ xfs_dir2_data_use_free(
int *needlogp, /* out: need to log header */
int *needscanp) /* out: need regen bestfree */
{
- xfs_dir2_data_t *d; /* data block */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
int matchback; /* matches end of freespace */
int matchfront; /* matches start of freespace */
@@ -708,24 +714,24 @@ xfs_dir2_data_use_free(
xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
int oldlen; /* old unused entry's length */
- d = bp->data;
- ASSERT(be32_to_cpu(d->hdr.magic) == XFS_DIR2_DATA_MAGIC ||
- be32_to_cpu(d->hdr.magic) == XFS_DIR2_BLOCK_MAGIC);
+ hdr = bp->data;
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
+ hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
- ASSERT(offset >= (char *)dup - (char *)d);
- ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)d);
- ASSERT((char *)dup - (char *)d == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
+ ASSERT(offset >= (char *)dup - (char *)hdr);
+ ASSERT(offset + len <= (char *)dup + be16_to_cpu(dup->length) - (char *)hdr);
+ ASSERT((char *)dup - (char *)hdr == be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
/*
* Look up the entry in the bestfree table.
*/
- dfp = xfs_dir2_data_freefind(d, dup);
+ dfp = xfs_dir2_data_freefind(hdr, dup);
oldlen = be16_to_cpu(dup->length);
- ASSERT(dfp || oldlen <= be16_to_cpu(d->hdr.bestfree[2].length));
+ ASSERT(dfp || oldlen <= be16_to_cpu(hdr->bestfree[2].length));
/*
* Check for alignment with front and back of the entry.
*/
- matchfront = (char *)dup - (char *)d == offset;
- matchback = (char *)dup + oldlen - (char *)d == offset + len;
+ matchfront = (char *)dup - (char *)hdr == offset;
+ matchback = (char *)dup + oldlen - (char *)hdr == offset + len;
ASSERT(*needscanp == 0);
needscan = 0;
/*
@@ -734,9 +740,9 @@ xfs_dir2_data_use_free(
*/
if (matchfront && matchback) {
if (dfp) {
- needscan = (d->hdr.bestfree[2].offset != 0);
+ needscan = (hdr->bestfree[2].offset != 0);
if (!needscan)
- xfs_dir2_data_freeremove(d, dfp, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
}
}
/*
@@ -744,27 +750,27 @@ xfs_dir2_data_use_free(
* Make a new entry with the remaining freespace.
*/
else if (matchfront) {
- newdup = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+ newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup->length = cpu_to_be16(oldlen - len);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
/*
* If it was in the table, remove it and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
- ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+ ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
* choice for the last slot, or not. Rescan.
*/
- needscan = dfp == &d->hdr.bestfree[2];
+ needscan = dfp == &hdr->bestfree[2];
}
}
/*
@@ -773,25 +779,25 @@ xfs_dir2_data_use_free(
*/
else if (matchback) {
newdup = dup;
- newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+ newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
/*
* If it was in the table, remove it and add the new one.
*/
if (dfp) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- dfp = xfs_dir2_data_freeinsert(d, newdup, needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
ASSERT(dfp != NULL);
ASSERT(dfp->length == newdup->length);
- ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)d);
+ ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr);
/*
* If we got inserted at the last slot,
* that means we don't know if there was a better
* choice for the last slot, or not. Rescan.
*/
- needscan = dfp == &d->hdr.bestfree[2];
+ needscan = dfp == &hdr->bestfree[2];
}
}
/*
@@ -800,15 +806,15 @@ xfs_dir2_data_use_free(
*/
else {
newdup = dup;
- newdup->length = cpu_to_be16(((char *)d + offset) - (char *)newdup);
+ newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);
*xfs_dir2_data_unused_tag_p(newdup) =
- cpu_to_be16((char *)newdup - (char *)d);
+ cpu_to_be16((char *)newdup - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup);
- newdup2 = (xfs_dir2_data_unused_t *)((char *)d + offset + len);
+ newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));
*xfs_dir2_data_unused_tag_p(newdup2) =
- cpu_to_be16((char *)newdup2 - (char *)d);
+ cpu_to_be16((char *)newdup2 - (char *)hdr);
xfs_dir2_data_log_unused(tp, bp, newdup2);
/*
* If the old entry was in the table, we need to scan
@@ -819,13 +825,12 @@ xfs_dir2_data_use_free(
* the 2 new will work.
*/
if (dfp) {
- needscan = (d->hdr.bestfree[2].length != 0);
+ needscan = (hdr->bestfree[2].length != 0);
if (!needscan) {
- xfs_dir2_data_freeremove(d, dfp, needlogp);
- (void)xfs_dir2_data_freeinsert(d, newdup,
- needlogp);
- (void)xfs_dir2_data_freeinsert(d, newdup2,
- needlogp);
+ xfs_dir2_data_freeremove(hdr, dfp, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup, needlogp);
+ xfs_dir2_data_freeinsert(hdr, newdup2,
+ needlogp);
}
}
}
diff --git a/fs/xfs/xfs_dir2_data.h b/fs/xfs/xfs_dir2_data.h
deleted file mode 100644
index efbc290..0000000
--- a/fs/xfs/xfs_dir2_data.h
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_DATA_H__
-#define __XFS_DIR2_DATA_H__
-
-/*
- * Directory format 2, data block structures.
- */
-
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_trans;
-
-/*
- * Constants.
- */
-#define XFS_DIR2_DATA_MAGIC 0x58443244 /* XD2D: for multiblock dirs */
-#define XFS_DIR2_DATA_ALIGN_LOG 3 /* i.e., 8 bytes */
-#define XFS_DIR2_DATA_ALIGN (1 << XFS_DIR2_DATA_ALIGN_LOG)
-#define XFS_DIR2_DATA_FREE_TAG 0xffff
-#define XFS_DIR2_DATA_FD_COUNT 3
-
-/*
- * Directory address space divided into sections,
- * spaces separated by 32GB.
- */
-#define XFS_DIR2_SPACE_SIZE (1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))
-#define XFS_DIR2_DATA_SPACE 0
-#define XFS_DIR2_DATA_OFFSET (XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_DATA_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)
-
-/*
- * Offsets of . and .. in data space (always block 0)
- */
-#define XFS_DIR2_DATA_DOT_OFFSET \
- ((xfs_dir2_data_aoff_t)sizeof(xfs_dir2_data_hdr_t))
-#define XFS_DIR2_DATA_DOTDOT_OFFSET \
- (XFS_DIR2_DATA_DOT_OFFSET + xfs_dir2_data_entsize(1))
-#define XFS_DIR2_DATA_FIRST_OFFSET \
- (XFS_DIR2_DATA_DOTDOT_OFFSET + xfs_dir2_data_entsize(2))
-
-/*
- * Structures.
- */
-
-/*
- * Describe a free area in the data block.
- * The freespace will be formatted as a xfs_dir2_data_unused_t.
- */
-typedef struct xfs_dir2_data_free {
- __be16 offset; /* start of freespace */
- __be16 length; /* length of freespace */
-} xfs_dir2_data_free_t;
-
-/*
- * Header for the data blocks.
- * Always at the beginning of a directory-sized block.
- * The code knows that XFS_DIR2_DATA_FD_COUNT is 3.
- */
-typedef struct xfs_dir2_data_hdr {
- __be32 magic; /* XFS_DIR2_DATA_MAGIC */
- /* or XFS_DIR2_BLOCK_MAGIC */
- xfs_dir2_data_free_t bestfree[XFS_DIR2_DATA_FD_COUNT];
-} xfs_dir2_data_hdr_t;
-
-/*
- * Active entry in a data block. Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_entry {
- __be64 inumber; /* inode number */
- __u8 namelen; /* name length */
- __u8 name[1]; /* name bytes, no null */
- /* variable offset */
- __be16 tag; /* starting offset of us */
-} xfs_dir2_data_entry_t;
-
-/*
- * Unused entry in a data block. Aligned to 8 bytes.
- * Tag appears as the last 2 bytes.
- */
-typedef struct xfs_dir2_data_unused {
- __be16 freetag; /* XFS_DIR2_DATA_FREE_TAG */
- __be16 length; /* total free length */
- /* variable offset */
- __be16 tag; /* starting offset of us */
-} xfs_dir2_data_unused_t;
-
-typedef union {
- xfs_dir2_data_entry_t entry;
- xfs_dir2_data_unused_t unused;
-} xfs_dir2_data_union_t;
-
-/*
- * Generic data block structure, for xfs_db.
- */
-typedef struct xfs_dir2_data {
- xfs_dir2_data_hdr_t hdr; /* magic XFS_DIR2_DATA_MAGIC */
- xfs_dir2_data_union_t u[1];
-} xfs_dir2_data_t;
-
-/*
- * Macros.
- */
-
-/*
- * Size of a data entry.
- */
-static inline int xfs_dir2_data_entsize(int n)
-{
- return (int)roundup(offsetof(xfs_dir2_data_entry_t, name[0]) + (n) + \
- (uint)sizeof(xfs_dir2_data_off_t), XFS_DIR2_DATA_ALIGN);
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_entry_tag_p(xfs_dir2_data_entry_t *dep)
-{
- return (__be16 *)((char *)dep +
- xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * Pointer to a freespace's tag word.
- */
-static inline __be16 *
-xfs_dir2_data_unused_tag_p(xfs_dir2_data_unused_t *dup)
-{
- return (__be16 *)((char *)dup +
- be16_to_cpu(dup->length) - sizeof(__be16));
-}
-
-/*
- * Function declarations.
- */
-#ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
-#else
-#define xfs_dir2_data_check(dp,bp)
-#endif
-extern xfs_dir2_data_free_t *xfs_dir2_data_freefind(xfs_dir2_data_t *d,
- xfs_dir2_data_unused_t *dup);
-extern xfs_dir2_data_free_t *xfs_dir2_data_freeinsert(xfs_dir2_data_t *d,
- xfs_dir2_data_unused_t *dup, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_mount *mp, xfs_dir2_data_t *d,
- int *loghead);
-extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
- struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_entry_t *dep);
-extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
- struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_unused_t *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_aoff_t offset,
- xfs_dir2_data_aoff_t len, int *needlogp,
- int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
- xfs_dir2_data_unused_t *dup,
- xfs_dir2_data_aoff_t offset,
- xfs_dir2_data_aoff_t len, int *needlogp,
- int *needscanp);
-
-#endif /* __XFS_DIR2_DATA_H__ */
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index ae89122..66e108f 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -24,18 +24,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -64,7 +60,7 @@ xfs_dir2_block_to_leaf(
{
__be16 *bestsp; /* leaf's bestsp entries */
xfs_dablk_t blkno; /* leaf block's bno */
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block's leaf entries */
xfs_dir2_block_tail_t *btp; /* block's tail */
xfs_inode_t *dp; /* incore directory inode */
@@ -101,9 +97,9 @@ xfs_dir2_block_to_leaf(
}
ASSERT(lbp != NULL);
leaf = lbp->data;
- block = dbp->data;
+ hdr = dbp->data;
xfs_dir2_data_check(dp, dbp);
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
* Set the counts in the leaf header.
@@ -123,23 +119,23 @@ xfs_dir2_block_to_leaf(
* tail be free.
*/
xfs_dir2_data_make_free(tp, dbp,
- (xfs_dir2_data_aoff_t)((char *)blp - (char *)block),
- (xfs_dir2_data_aoff_t)((char *)block + mp->m_dirblksize -
+ (xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),
+ (xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize -
(char *)blp),
&needlog, &needscan);
/*
* Fix up the block header, make it a data block.
*/
- block->hdr.magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
+ hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
if (needscan)
- xfs_dir2_data_freescan(mp, (xfs_dir2_data_t *)block, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Set up leaf tail and bests table.
*/
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
ltp->bestcount = cpu_to_be32(1);
bestsp = xfs_dir2_leaf_bests_p(ltp);
- bestsp[0] = block->hdr.bestfree[0].length;
+ bestsp[0] = hdr->bestfree[0].length;
/*
* Log the data header and leaf bests table.
*/
@@ -152,6 +148,131 @@ xfs_dir2_block_to_leaf(
return 0;
}
+STATIC void
+xfs_dir2_leaf_find_stale(
+ struct xfs_dir2_leaf *leaf,
+ int index,
+ int *lowstale,
+ int *highstale)
+{
+ /*
+ * Find the first stale entry before our index, if any.
+ */
+ for (*lowstale = index - 1; *lowstale >= 0; --*lowstale) {
+ if (leaf->ents[*lowstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ break;
+ }
+
+ /*
+ * Find the first stale entry at or after our index, if any.
+ * Stop if the result would require moving more entries than using
+ * lowstale.
+ */
+ for (*highstale = index;
+ *highstale < be16_to_cpu(leaf->hdr.count);
+ ++*highstale) {
+ if (leaf->ents[*highstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ break;
+ if (*lowstale >= 0 && index - *lowstale <= *highstale - index)
+ break;
+ }
+}
+
+struct xfs_dir2_leaf_entry *
+xfs_dir2_leaf_find_entry(
+ xfs_dir2_leaf_t *leaf, /* leaf structure */
+ int index, /* leaf table position */
+ int compact, /* need to compact leaves */
+ int lowstale, /* index of prev stale leaf */
+ int highstale, /* index of next stale leaf */
+ int *lfloglow, /* low leaf logging index */
+ int *lfloghigh) /* high leaf logging index */
+{
+ if (!leaf->hdr.stale) {
+ xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
+
+ /*
+ * Now we need to make room to insert the leaf entry.
+ *
+ * If there are no stale entries, just insert a hole at index.
+ */
+ lep = &leaf->ents[index];
+ if (index < be16_to_cpu(leaf->hdr.count))
+ memmove(lep + 1, lep,
+ (be16_to_cpu(leaf->hdr.count) - index) *
+ sizeof(*lep));
+
+ /*
+ * Record low and high logging indices for the leaf.
+ */
+ *lfloglow = index;
+ *lfloghigh = be16_to_cpu(leaf->hdr.count);
+ be16_add_cpu(&leaf->hdr.count, 1);
+ return lep;
+ }
+
+ /*
+ * There are stale entries.
+ *
+ * We will use one of them for the new entry. It's probably not at
+ * the right location, so we'll have to shift some up or down first.
+ *
+ * If we didn't compact before, we need to find the nearest stale
+ * entries before and after our insertion point.
+ */
+ if (compact == 0)
+ xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+
+ /*
+ * If the low one is better, use it.
+ */
+ if (lowstale >= 0 &&
+ (highstale == be16_to_cpu(leaf->hdr.count) ||
+ index - lowstale - 1 < highstale - index)) {
+ ASSERT(index - lowstale - 1 >= 0);
+ ASSERT(leaf->ents[lowstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+ /*
+ * Copy entries up to cover the stale entry and make room
+ * for the new entry.
+ */
+ if (index - lowstale - 1 > 0) {
+ memmove(&leaf->ents[lowstale],
+ &leaf->ents[lowstale + 1],
+ (index - lowstale - 1) *
+ sizeof(xfs_dir2_leaf_entry_t));
+ }
+ *lfloglow = MIN(lowstale, *lfloglow);
+ *lfloghigh = MAX(index - 1, *lfloghigh);
+ be16_add_cpu(&leaf->hdr.stale, -1);
+ return &leaf->ents[index - 1];
+ }
+
+ /*
+ * The high one is better, so use that one.
+ */
+ ASSERT(highstale - index >= 0);
+ ASSERT(leaf->ents[highstale].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR));
+
+ /*
+ * Copy entries down to cover the stale entry and make room for the
+ * new entry.
+ */
+ if (highstale - index > 0) {
+ memmove(&leaf->ents[index + 1],
+ &leaf->ents[index],
+ (highstale - index) * sizeof(xfs_dir2_leaf_entry_t));
+ }
+ *lfloglow = MIN(index, *lfloglow);
+ *lfloghigh = MAX(highstale, *lfloghigh);
+ be16_add_cpu(&leaf->hdr.stale, -1);
+ return &leaf->ents[index];
+}
+
/*
* Add an entry to a leaf form directory.
*/
@@ -161,7 +282,7 @@ xfs_dir2_leaf_addname(
{
__be16 *bestsp; /* freespace table in leaf */
int compact; /* need to compact leaves */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
@@ -225,7 +346,7 @@ xfs_dir2_leaf_addname(
continue;
i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
ASSERT(i < be32_to_cpu(ltp->bestcount));
- ASSERT(be16_to_cpu(bestsp[i]) != NULLDATAOFF);
+ ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));
if (be16_to_cpu(bestsp[i]) >= length) {
use_block = i;
break;
@@ -239,7 +360,8 @@ xfs_dir2_leaf_addname(
/*
* Remember a block we see that's missing.
*/
- if (be16_to_cpu(bestsp[i]) == NULLDATAOFF && use_block == -1)
+ if (bestsp[i] == cpu_to_be16(NULLDATAOFF) &&
+ use_block == -1)
use_block = i;
else if (be16_to_cpu(bestsp[i]) >= length) {
use_block = i;
@@ -250,14 +372,17 @@ xfs_dir2_leaf_addname(
/*
* How many bytes do we need in the leaf block?
*/
- needbytes =
- (leaf->hdr.stale ? 0 : (uint)sizeof(leaf->ents[0])) +
- (use_block != -1 ? 0 : (uint)sizeof(leaf->bests[0]));
+ needbytes = 0;
+ if (!leaf->hdr.stale)
+ needbytes += sizeof(xfs_dir2_leaf_entry_t);
+ if (use_block == -1)
+ needbytes += sizeof(xfs_dir2_data_off_t);
+
/*
* Now kill use_block if it refers to a missing block, so we
* can use it as an indication of allocation needed.
*/
- if (use_block != -1 && be16_to_cpu(bestsp[use_block]) == NULLDATAOFF)
+ if (use_block != -1 && bestsp[use_block] == cpu_to_be16(NULLDATAOFF))
use_block = -1;
/*
* If we don't have enough free bytes but we can make enough
@@ -369,8 +494,8 @@ xfs_dir2_leaf_addname(
*/
else
xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
- data = dbp->data;
- bestsp[use_block] = data->hdr.bestfree[0].length;
+ hdr = dbp->data;
+ bestsp[use_block] = hdr->bestfree[0].length;
grown = 1;
}
/*
@@ -384,7 +509,7 @@ xfs_dir2_leaf_addname(
xfs_da_brelse(tp, lbp);
return error;
}
- data = dbp->data;
+ hdr = dbp->data;
grown = 0;
}
xfs_dir2_data_check(dp, dbp);
@@ -392,14 +517,14 @@ xfs_dir2_leaf_addname(
* Point to the biggest freespace in our data block.
*/
dup = (xfs_dir2_data_unused_t *)
- ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+ ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
ASSERT(be16_to_cpu(dup->length) >= length);
needscan = needlog = 0;
/*
* Mark the initial part of our freespace in use for the new entry.
*/
xfs_dir2_data_use_free(tp, dbp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
* Initialize our new entry (at last).
@@ -409,12 +534,12 @@ xfs_dir2_leaf_addname(
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)data);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Need to scan fix up the bestfree table.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Need to log the data block's header.
*/
@@ -425,107 +550,15 @@ xfs_dir2_leaf_addname(
* If the bests table needs to be changed, do it.
* Log the change unless we've already done that.
*/
- if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
- bestsp[use_block] = data->hdr.bestfree[0].length;
+ if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(hdr->bestfree[0].length)) {
+ bestsp[use_block] = hdr->bestfree[0].length;
if (!grown)
xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
}
- /*
- * Now we need to make room to insert the leaf entry.
- * If there are no stale entries, we just insert a hole at index.
- */
- if (!leaf->hdr.stale) {
- /*
- * lep is still good as the index leaf entry.
- */
- if (index < be16_to_cpu(leaf->hdr.count))
- memmove(lep + 1, lep,
- (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
- /*
- * Record low and high logging indices for the leaf.
- */
- lfloglow = index;
- lfloghigh = be16_to_cpu(leaf->hdr.count);
- be16_add_cpu(&leaf->hdr.count, 1);
- }
- /*
- * There are stale entries.
- * We will use one of them for the new entry.
- * It's probably not at the right location, so we'll have to
- * shift some up or down first.
- */
- else {
- /*
- * If we didn't compact before, we need to find the nearest
- * stale entries before and after our insertion point.
- */
- if (compact == 0) {
- /*
- * Find the first stale entry before the insertion
- * point, if any.
- */
- for (lowstale = index - 1;
- lowstale >= 0 &&
- be32_to_cpu(leaf->ents[lowstale].address) !=
- XFS_DIR2_NULL_DATAPTR;
- lowstale--)
- continue;
- /*
- * Find the next stale entry at or after the insertion
- * point, if any. Stop if we go so far that the
- * lowstale entry would be better.
- */
- for (highstale = index;
- highstale < be16_to_cpu(leaf->hdr.count) &&
- be32_to_cpu(leaf->ents[highstale].address) !=
- XFS_DIR2_NULL_DATAPTR &&
- (lowstale < 0 ||
- index - lowstale - 1 >= highstale - index);
- highstale++)
- continue;
- }
- /*
- * If the low one is better, use it.
- */
- if (lowstale >= 0 &&
- (highstale == be16_to_cpu(leaf->hdr.count) ||
- index - lowstale - 1 < highstale - index)) {
- ASSERT(index - lowstale - 1 >= 0);
- ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- /*
- * Copy entries up to cover the stale entry
- * and make room for the new entry.
- */
- if (index - lowstale - 1 > 0)
- memmove(&leaf->ents[lowstale],
- &leaf->ents[lowstale + 1],
- (index - lowstale - 1) * sizeof(*lep));
- lep = &leaf->ents[index - 1];
- lfloglow = MIN(lowstale, lfloglow);
- lfloghigh = MAX(index - 1, lfloghigh);
- }
- /*
- * The high one is better, so use that one.
- */
- else {
- ASSERT(highstale - index >= 0);
- ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- /*
- * Copy entries down to cover the stale entry
- * and make room for the new entry.
- */
- if (highstale - index > 0)
- memmove(&leaf->ents[index + 1],
- &leaf->ents[index],
- (highstale - index) * sizeof(*lep));
- lep = &leaf->ents[index];
- lfloglow = MIN(index, lfloglow);
- lfloghigh = MAX(highstale, lfloghigh);
- }
- be16_add_cpu(&leaf->hdr.stale, -1);
- }
+
+ lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+ highstale, &lfloglow, &lfloghigh);
+
/*
* Fill in the new leaf entry.
*/
@@ -562,7 +595,7 @@ xfs_dir2_leaf_check(
leaf = bp->data;
mp = dp->i_mount;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
/*
* This value is not restrictive enough.
* Should factor in the size of the bests table as well.
@@ -582,7 +615,7 @@ xfs_dir2_leaf_check(
if (i + 1 < be16_to_cpu(leaf->hdr.count))
ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
be32_to_cpu(leaf->ents[i + 1].hashval));
- if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -611,7 +644,8 @@ xfs_dir2_leaf_compact(
* Compress out the stale entries in place.
*/
for (from = to = 0, loglow = -1; from < be16_to_cpu(leaf->hdr.count); from++) {
- if (be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
/*
* Only actually copy the entries that are different.
@@ -663,24 +697,9 @@ xfs_dir2_leaf_compact_x1(
leaf = bp->data;
ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
index = *indexp;
- /*
- * Find the first stale entry before our index, if any.
- */
- for (lowstale = index - 1;
- lowstale >= 0 &&
- be32_to_cpu(leaf->ents[lowstale].address) != XFS_DIR2_NULL_DATAPTR;
- lowstale--)
- continue;
- /*
- * Find the first stale entry at or after our index, if any.
- * Stop if the answer would be worse than lowstale.
- */
- for (highstale = index;
- highstale < be16_to_cpu(leaf->hdr.count) &&
- be32_to_cpu(leaf->ents[highstale].address) != XFS_DIR2_NULL_DATAPTR &&
- (lowstale < 0 || index - lowstale > highstale - index);
- highstale++)
- continue;
+
+ xfs_dir2_leaf_find_stale(leaf, index, &lowstale, &highstale);
+
/*
* Pick the better of lowstale and highstale.
*/
@@ -701,7 +720,8 @@ xfs_dir2_leaf_compact_x1(
if (index == from)
newindex = to;
if (from != keepstale &&
- be32_to_cpu(leaf->ents[from].address) == XFS_DIR2_NULL_DATAPTR) {
+ leaf->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR)) {
if (from == to)
*lowlogp = to;
continue;
@@ -760,7 +780,7 @@ xfs_dir2_leaf_getdents(
int byteoff; /* offset in current block */
xfs_dir2_db_t curdb; /* db for current block */
xfs_dir2_off_t curoff; /* current overall offset */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
int error = 0; /* error return value */
@@ -868,12 +888,10 @@ xfs_dir2_leaf_getdents(
* we already have in the table.
*/
nmap = map_size - map_valid;
- error = xfs_bmapi(NULL, dp,
- map_off,
+ error = xfs_bmapi_read(dp, map_off,
xfs_dir2_byte_to_da(mp,
XFS_DIR2_LEAF_OFFSET) - map_off,
- XFS_BMAPI_METADATA, NULL, 0,
- &map[map_valid], &nmap, NULL);
+ &map[map_valid], &nmap, 0);
/*
* Don't know if we should ignore this or
* try to return an error.
@@ -1018,23 +1036,23 @@ xfs_dir2_leaf_getdents(
else if (curoff > newoff)
ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
curdb);
- data = bp->data;
+ hdr = bp->data;
xfs_dir2_data_check(dp, bp);
/*
* Find our position in the block.
*/
- ptr = (char *)&data->u;
+ ptr = (char *)(hdr + 1);
byteoff = xfs_dir2_byte_to_off(mp, curoff);
/*
* Skip past the header.
*/
if (byteoff == 0)
- curoff += (uint)sizeof(data->hdr);
+ curoff += (uint)sizeof(*hdr);
/*
* Skip past entries until we reach our offset.
*/
else {
- while ((char *)ptr - (char *)data < byteoff) {
+ while ((char *)ptr - (char *)hdr < byteoff) {
dup = (xfs_dir2_data_unused_t *)ptr;
if (be16_to_cpu(dup->freetag)
@@ -1055,8 +1073,8 @@ xfs_dir2_leaf_getdents(
curoff =
xfs_dir2_db_off_to_byte(mp,
xfs_dir2_byte_to_db(mp, curoff),
- (char *)ptr - (char *)data);
- if (ptr >= (char *)data + mp->m_dirblksize) {
+ (char *)ptr - (char *)hdr);
+ if (ptr >= (char *)hdr + mp->m_dirblksize) {
continue;
}
}
@@ -1179,7 +1197,7 @@ xfs_dir2_leaf_log_bests(
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
firstb = xfs_dir2_leaf_bests_p(ltp) + first;
lastb = xfs_dir2_leaf_bests_p(ltp) + last;
@@ -1202,8 +1220,8 @@ xfs_dir2_leaf_log_ents(
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
- be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+ leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
firstlep = &leaf->ents[first];
lastlep = &leaf->ents[last];
xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
@@ -1221,8 +1239,8 @@ xfs_dir2_leaf_log_header(
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC ||
- be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
+ leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
(uint)(sizeof(leaf->hdr) - 1));
}
@@ -1241,7 +1259,7 @@ xfs_dir2_leaf_log_tail(
mp = tp->t_mountp;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAF1_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
(uint)(mp->m_dirblksize - 1));
@@ -1437,7 +1455,7 @@ xfs_dir2_leaf_removename(
xfs_da_args_t *args) /* operation arguments */
{
__be16 *bestsp; /* leaf block best freespace */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry structure */
@@ -1467,7 +1485,7 @@ xfs_dir2_leaf_removename(
tp = args->trans;
mp = dp->i_mount;
leaf = lbp->data;
- data = dbp->data;
+ hdr = dbp->data;
xfs_dir2_data_check(dp, dbp);
/*
* Point to the leaf entry, use that to point to the data entry.
@@ -1475,9 +1493,9 @@ xfs_dir2_leaf_removename(
lep = &leaf->ents[index];
db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address));
dep = (xfs_dir2_data_entry_t *)
- ((char *)data + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
needscan = needlog = 0;
- oldbest = be16_to_cpu(data->hdr.bestfree[0].length);
+ oldbest = be16_to_cpu(hdr->bestfree[0].length);
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
ASSERT(be16_to_cpu(bestsp[db]) == oldbest);
@@ -1485,7 +1503,7 @@ xfs_dir2_leaf_removename(
* Mark the former data entry unused.
*/
xfs_dir2_data_make_free(tp, dbp,
- (xfs_dir2_data_aoff_t)((char *)dep - (char *)data),
+ (xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
/*
* We just mark the leaf entry stale by putting a null in it.
@@ -1499,23 +1517,23 @@ xfs_dir2_leaf_removename(
* log the data block header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
/*
* If the longest freespace in the data block has changed,
* put the new value in the bests table and log that.
*/
- if (be16_to_cpu(data->hdr.bestfree[0].length) != oldbest) {
- bestsp[db] = data->hdr.bestfree[0].length;
+ if (be16_to_cpu(hdr->bestfree[0].length) != oldbest) {
+ bestsp[db] = hdr->bestfree[0].length;
xfs_dir2_leaf_log_bests(tp, lbp, db, db);
}
xfs_dir2_data_check(dp, dbp);
/*
* If the data block is now empty then get rid of the data block.
*/
- if (be16_to_cpu(data->hdr.bestfree[0].length) ==
- mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+ if (be16_to_cpu(hdr->bestfree[0].length) ==
+ mp->m_dirblksize - (uint)sizeof(*hdr)) {
ASSERT(db != mp->m_dirdatablk);
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
/*
@@ -1542,7 +1560,7 @@ xfs_dir2_leaf_removename(
* Look for the last active entry (i).
*/
for (i = db - 1; i > 0; i--) {
- if (be16_to_cpu(bestsp[i]) != NULLDATAOFF)
+ if (bestsp[i] != cpu_to_be16(NULLDATAOFF))
break;
}
/*
@@ -1686,9 +1704,6 @@ xfs_dir2_leaf_trim_data(
xfs_dir2_db_t db) /* data block number */
{
__be16 *bestsp; /* leaf bests table */
-#ifdef DEBUG
- xfs_dir2_data_t *data; /* data block structure */
-#endif
xfs_dabuf_t *dbp; /* data block buffer */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
@@ -1707,20 +1722,21 @@ xfs_dir2_leaf_trim_data(
XFS_DATA_FORK))) {
return error;
}
-#ifdef DEBUG
- data = dbp->data;
- ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
-#endif
- /* this seems to be an error
- * data is only valid if DEBUG is defined?
- * RMC 09/08/1999
- */
leaf = lbp->data;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
- ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) ==
- mp->m_dirblksize - (uint)sizeof(data->hdr));
+
+#ifdef DEBUG
+{
+ struct xfs_dir2_data_hdr *hdr = dbp->data;
+
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
+ ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
+ mp->m_dirblksize - (uint)sizeof(*hdr));
ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
+}
+#endif
+
/*
* Get rid of the data block.
*/
@@ -1740,6 +1756,20 @@ xfs_dir2_leaf_trim_data(
return 0;
}
+static inline size_t
+xfs_dir2_leaf_size(
+ struct xfs_dir2_leaf_hdr *hdr,
+ int counts)
+{
+ int entries;
+
+ entries = be16_to_cpu(hdr->count) - be16_to_cpu(hdr->stale);
+ return sizeof(xfs_dir2_leaf_hdr_t) +
+ entries * sizeof(xfs_dir2_leaf_entry_t) +
+ counts * sizeof(xfs_dir2_data_off_t) +
+ sizeof(xfs_dir2_leaf_tail_t);
+}
+
/*
* Convert node form directory to leaf form directory.
* The root of the node form dir needs to already be a LEAFN block.
@@ -1810,7 +1840,7 @@ xfs_dir2_node_to_leaf(
return 0;
lbp = state->path.blk[0].bp;
leaf = lbp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* Read the freespace block.
*/
@@ -1819,20 +1849,19 @@ xfs_dir2_node_to_leaf(
return error;
}
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(!free->hdr.firstdb);
+
/*
* Now see if the leafn and free data will fit in a leaf1.
* If not, release the buffer and give up.
*/
- if ((uint)sizeof(leaf->hdr) +
- (be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale)) * (uint)sizeof(leaf->ents[0]) +
- be32_to_cpu(free->hdr.nvalid) * (uint)sizeof(leaf->bests[0]) +
- (uint)sizeof(leaf->tail) >
- mp->m_dirblksize) {
+ if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
+ mp->m_dirblksize) {
xfs_da_brelse(tp, fbp);
return 0;
}
+
/*
* If the leaf has any stale entries in it, compress them out.
* The compact routine will log the header.
@@ -1851,7 +1880,7 @@ xfs_dir2_node_to_leaf(
* Set up the leaf bests table.
*/
memcpy(xfs_dir2_leaf_bests_p(ltp), free->bests,
- be32_to_cpu(ltp->bestcount) * sizeof(leaf->bests[0]));
+ be32_to_cpu(ltp->bestcount) * sizeof(xfs_dir2_data_off_t));
xfs_dir2_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
xfs_dir2_leaf_log_tail(tp, lbp);
xfs_dir2_leaf_check(dp, lbp);
diff --git a/fs/xfs/xfs_dir2_leaf.h b/fs/xfs/xfs_dir2_leaf.h
deleted file mode 100644
index 6c9539f..0000000
--- a/fs/xfs/xfs_dir2_leaf.h
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_LEAF_H__
-#define __XFS_DIR2_LEAF_H__
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Offset of the leaf/node space. First block in this space
- * is the btree root.
- */
-#define XFS_DIR2_LEAF_SPACE 1
-#define XFS_DIR2_LEAF_OFFSET (XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_LEAF_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)
-
-/*
- * Offset in data space of a data entry.
- */
-typedef __uint32_t xfs_dir2_dataptr_t;
-#define XFS_DIR2_MAX_DATAPTR ((xfs_dir2_dataptr_t)0xffffffff)
-#define XFS_DIR2_NULL_DATAPTR ((xfs_dir2_dataptr_t)0)
-
-/*
- * Leaf block header.
- */
-typedef struct xfs_dir2_leaf_hdr {
- xfs_da_blkinfo_t info; /* header for da routines */
- __be16 count; /* count of entries */
- __be16 stale; /* count of stale entries */
-} xfs_dir2_leaf_hdr_t;
-
-/*
- * Leaf block entry.
- */
-typedef struct xfs_dir2_leaf_entry {
- __be32 hashval; /* hash value of name */
- __be32 address; /* address of data entry */
-} xfs_dir2_leaf_entry_t;
-
-/*
- * Leaf block tail.
- */
-typedef struct xfs_dir2_leaf_tail {
- __be32 bestcount;
-} xfs_dir2_leaf_tail_t;
-
-/*
- * Leaf block.
- * bests and tail are at the end of the block for single-leaf only
- * (magic = XFS_DIR2_LEAF1_MAGIC not XFS_DIR2_LEAFN_MAGIC).
- */
-typedef struct xfs_dir2_leaf {
- xfs_dir2_leaf_hdr_t hdr; /* leaf header */
- xfs_dir2_leaf_entry_t ents[1]; /* entries */
- /* ... */
- xfs_dir2_data_off_t bests[1]; /* best free counts */
- xfs_dir2_leaf_tail_t tail; /* leaf tail */
-} xfs_dir2_leaf_t;
-
-/*
- * DB blocks here are logical directory block numbers, not filesystem blocks.
- */
-
-static inline int xfs_dir2_max_leaf_ents(struct xfs_mount *mp)
-{
- return (int)(((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_leaf_hdr_t)) /
- (uint)sizeof(xfs_dir2_leaf_entry_t));
-}
-
-/*
- * Get address of the bestcount field in the single-leaf block.
- */
-static inline xfs_dir2_leaf_tail_t *
-xfs_dir2_leaf_tail_p(struct xfs_mount *mp, xfs_dir2_leaf_t *lp)
-{
- return (xfs_dir2_leaf_tail_t *)
- ((char *)(lp) + (mp)->m_dirblksize -
- (uint)sizeof(xfs_dir2_leaf_tail_t));
-}
-
-/*
- * Get address of the bests array in the single-leaf block.
- */
-static inline __be16 *
-xfs_dir2_leaf_bests_p(xfs_dir2_leaf_tail_t *ltp)
-{
- return (__be16 *)ltp - be32_to_cpu(ltp->bestcount);
-}
-
-/*
- * Convert dataptr to byte in file space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return (xfs_dir2_off_t)(dp) << XFS_DIR2_DATA_ALIGN_LOG;
-}
-
-/*
- * Convert byte in file space to dataptr. It had better be aligned.
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_dataptr_t)((by) >> XFS_DIR2_DATA_ALIGN_LOG);
-}
-
-/*
- * Convert byte in space to (DB) block
- */
-static inline xfs_dir2_db_t
-xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_db_t)((by) >> \
- ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog));
-}
-
-/*
- * Convert dataptr to a block number
- */
-static inline xfs_dir2_db_t
-xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert byte in space to offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return (xfs_dir2_data_aoff_t)((by) & \
- ((1 << ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) - 1));
-}
-
-/*
- * Convert dataptr to a byte offset in a block
- */
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp)
-{
- return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp));
-}
-
-/*
- * Convert block and offset to byte in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db,
- xfs_dir2_data_aoff_t o)
-{
- return ((xfs_dir2_off_t)(db) << \
- ((mp)->m_sb.sb_blocklog + (mp)->m_sb.sb_dirblklog)) + (o);
-}
-
-/*
- * Convert block (DB) to block (dablk)
- */
-static inline xfs_dablk_t
-xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return (xfs_dablk_t)((db) << (mp)->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert byte in space to (DA) block
- */
-static inline xfs_dablk_t
-xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by)
-{
- return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by));
-}
-
-/*
- * Convert block and offset to dataptr
- */
-static inline xfs_dir2_dataptr_t
-xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db,
- xfs_dir2_data_aoff_t o)
-{
- return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o));
-}
-
-/*
- * Convert block (dablk) to block (DB)
- */
-static inline xfs_dir2_db_t
-xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da)
-{
- return (xfs_dir2_db_t)((da) >> (mp)->m_sb.sb_dirblklog);
-}
-
-/*
- * Convert block (dablk) to byte offset in space
- */
-static inline xfs_dir2_off_t
-xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)
-{
- return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0);
-}
-
-/*
- * Function declarations.
- */
-extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
- struct xfs_dabuf *dbp);
-extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
-extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
- struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
- int *lowstalep, int *highstalep,
- int *lowlogp, int *highlogp);
-extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
- size_t bufsize, xfs_off_t *offset,
- filldir_t filldir);
-extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
- struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
- int first, int last);
-extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
- struct xfs_dabuf *bp);
-extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
-extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
- struct xfs_dabuf *lbp);
-extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
- struct xfs_dabuf *lbp, xfs_dir2_db_t db);
-extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
-
-#endif /* __XFS_DIR2_LEAF_H__ */
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index a0aab7d..0179a41 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -23,18 +23,14 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
-#include "xfs_dir2_node.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
@@ -73,7 +69,7 @@ xfs_dir2_free_log_bests(
xfs_dir2_free_t *free; /* freespace structure */
free = bp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
xfs_da_log_buf(tp, bp,
(uint)((char *)&free->bests[first] - (char *)free),
(uint)((char *)&free->bests[last] - (char *)free +
@@ -91,7 +87,7 @@ xfs_dir2_free_log_header(
xfs_dir2_free_t *free; /* freespace structure */
free = bp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
}
@@ -244,89 +240,13 @@ xfs_dir2_leafn_add(
lfloglow = be16_to_cpu(leaf->hdr.count);
lfloghigh = -1;
}
- /*
- * No stale entries, just insert a space for the new entry.
- */
- if (!leaf->hdr.stale) {
- lep = &leaf->ents[index];
- if (index < be16_to_cpu(leaf->hdr.count))
- memmove(lep + 1, lep,
- (be16_to_cpu(leaf->hdr.count) - index) * sizeof(*lep));
- lfloglow = index;
- lfloghigh = be16_to_cpu(leaf->hdr.count);
- be16_add_cpu(&leaf->hdr.count, 1);
- }
- /*
- * There are stale entries. We'll use one for the new entry.
- */
- else {
- /*
- * If we didn't do a compact then we need to figure out
- * which stale entry will be used.
- */
- if (compact == 0) {
- /*
- * Find first stale entry before our insertion point.
- */
- for (lowstale = index - 1;
- lowstale >= 0 &&
- be32_to_cpu(leaf->ents[lowstale].address) !=
- XFS_DIR2_NULL_DATAPTR;
- lowstale--)
- continue;
- /*
- * Find next stale entry after insertion point.
- * Stop looking if the answer would be worse than
- * lowstale already found.
- */
- for (highstale = index;
- highstale < be16_to_cpu(leaf->hdr.count) &&
- be32_to_cpu(leaf->ents[highstale].address) !=
- XFS_DIR2_NULL_DATAPTR &&
- (lowstale < 0 ||
- index - lowstale - 1 >= highstale - index);
- highstale++)
- continue;
- }
- /*
- * Using the low stale entry.
- * Shift entries up toward the stale slot.
- */
- if (lowstale >= 0 &&
- (highstale == be16_to_cpu(leaf->hdr.count) ||
- index - lowstale - 1 < highstale - index)) {
- ASSERT(be32_to_cpu(leaf->ents[lowstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- ASSERT(index - lowstale - 1 >= 0);
- if (index - lowstale - 1 > 0)
- memmove(&leaf->ents[lowstale],
- &leaf->ents[lowstale + 1],
- (index - lowstale - 1) * sizeof(*lep));
- lep = &leaf->ents[index - 1];
- lfloglow = MIN(lowstale, lfloglow);
- lfloghigh = MAX(index - 1, lfloghigh);
- }
- /*
- * Using the high stale entry.
- * Shift entries down toward the stale slot.
- */
- else {
- ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
- XFS_DIR2_NULL_DATAPTR);
- ASSERT(highstale - index >= 0);
- if (highstale - index > 0)
- memmove(&leaf->ents[index + 1],
- &leaf->ents[index],
- (highstale - index) * sizeof(*lep));
- lep = &leaf->ents[index];
- lfloglow = MIN(index, lfloglow);
- lfloghigh = MAX(highstale, lfloghigh);
- }
- be16_add_cpu(&leaf->hdr.stale, -1);
- }
+
/*
* Insert the new entry, log everything.
*/
+ lep = xfs_dir2_leaf_find_entry(leaf, index, compact, lowstale,
+ highstale, &lfloglow, &lfloghigh);
+
lep->hashval = cpu_to_be32(args->hashval);
lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp,
args->blkno, args->index));
@@ -352,14 +272,14 @@ xfs_dir2_leafn_check(
leaf = bp->data;
mp = dp->i_mount;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
for (i = stale = 0; i < be16_to_cpu(leaf->hdr.count); i++) {
if (i + 1 < be16_to_cpu(leaf->hdr.count)) {
ASSERT(be32_to_cpu(leaf->ents[i].hashval) <=
be32_to_cpu(leaf->ents[i + 1].hashval));
}
- if (be32_to_cpu(leaf->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
ASSERT(be16_to_cpu(leaf->hdr.stale) == stale);
@@ -378,7 +298,7 @@ xfs_dir2_leafn_lasthash(
xfs_dir2_leaf_t *leaf; /* leaf structure */
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
if (count)
*count = be16_to_cpu(leaf->hdr.count);
if (!leaf->hdr.count)
@@ -417,7 +337,7 @@ xfs_dir2_leafn_lookup_for_addname(
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
#endif
@@ -434,7 +354,7 @@ xfs_dir2_leafn_lookup_for_addname(
curbp = state->extrablk.bp;
curfdb = state->extrablk.blkno;
free = curbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
}
length = xfs_dir2_data_entsize(args->namelen);
/*
@@ -488,7 +408,7 @@ xfs_dir2_leafn_lookup_for_addname(
ASSERT(be32_to_cpu(free->hdr.magic) ==
XFS_DIR2_FREE_MAGIC);
ASSERT((be32_to_cpu(free->hdr.firstdb) %
- XFS_DIR2_MAX_FREE_BESTS(mp)) == 0);
+ xfs_dir2_free_max_bests(mp)) == 0);
ASSERT(be32_to_cpu(free->hdr.firstdb) <= curdb);
ASSERT(curdb < be32_to_cpu(free->hdr.firstdb) +
be32_to_cpu(free->hdr.nvalid));
@@ -500,7 +420,8 @@ xfs_dir2_leafn_lookup_for_addname(
/*
* If it has room, return it.
*/
- if (unlikely(be16_to_cpu(free->bests[fi]) == NULLDATAOFF)) {
+ if (unlikely(free->bests[fi] ==
+ cpu_to_be16(NULLDATAOFF))) {
XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
XFS_ERRLEVEL_LOW, mp);
if (curfdb != newfdb)
@@ -561,7 +482,7 @@ xfs_dir2_leafn_lookup_for_entry(
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
#endif
@@ -742,7 +663,8 @@ xfs_dir2_leafn_moveents(
int i; /* temp leaf index */
for (i = start_s, stale = 0; i < start_s + count; i++) {
- if (be32_to_cpu(leaf_s->ents[i].address) == XFS_DIR2_NULL_DATAPTR)
+ if (leaf_s->ents[i].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
} else
@@ -789,8 +711,8 @@ xfs_dir2_leafn_order(
leaf1 = leaf1_bp->data;
leaf2 = leaf2_bp->data;
- ASSERT(be16_to_cpu(leaf1->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
- ASSERT(be16_to_cpu(leaf2->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+ ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
if (be16_to_cpu(leaf1->hdr.count) > 0 &&
be16_to_cpu(leaf2->hdr.count) > 0 &&
(be32_to_cpu(leaf2->ents[0].hashval) < be32_to_cpu(leaf1->ents[0].hashval) ||
@@ -918,7 +840,7 @@ xfs_dir2_leafn_remove(
xfs_da_state_blk_t *dblk, /* data block */
int *rval) /* resulting block needs join */
{
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
@@ -938,7 +860,7 @@ xfs_dir2_leafn_remove(
tp = args->trans;
mp = dp->i_mount;
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* Point to the entry we're removing.
*/
@@ -963,9 +885,9 @@ xfs_dir2_leafn_remove(
* in the data block in case it changes.
*/
dbp = dblk->bp;
- data = dbp->data;
- dep = (xfs_dir2_data_entry_t *)((char *)data + off);
- longest = be16_to_cpu(data->hdr.bestfree[0].length);
+ hdr = dbp->data;
+ dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
+ longest = be16_to_cpu(hdr->bestfree[0].length);
needlog = needscan = 0;
xfs_dir2_data_make_free(tp, dbp, off,
xfs_dir2_data_entsize(dep->namelen), &needlog, &needscan);
@@ -974,7 +896,7 @@ xfs_dir2_leafn_remove(
* Log the data block header if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(tp, dbp);
xfs_dir2_data_check(dp, dbp);
@@ -982,7 +904,7 @@ xfs_dir2_leafn_remove(
* If the longest data block freespace changes, need to update
* the corresponding freeblock entry.
*/
- if (longest < be16_to_cpu(data->hdr.bestfree[0].length)) {
+ if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
int error; /* error return value */
xfs_dabuf_t *fbp; /* freeblock buffer */
xfs_dir2_db_t fdb; /* freeblock block number */
@@ -1000,27 +922,27 @@ xfs_dir2_leafn_remove(
return error;
}
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(be32_to_cpu(free->hdr.firstdb) ==
- XFS_DIR2_MAX_FREE_BESTS(mp) *
+ xfs_dir2_free_max_bests(mp) *
(fdb - XFS_DIR2_FREE_FIRSTDB(mp)));
/*
* Calculate which entry we need to fix.
*/
findex = xfs_dir2_db_to_fdindex(mp, db);
- longest = be16_to_cpu(data->hdr.bestfree[0].length);
+ longest = be16_to_cpu(hdr->bestfree[0].length);
/*
* If the data block is now empty we can get rid of it
* (usually).
*/
- if (longest == mp->m_dirblksize - (uint)sizeof(data->hdr)) {
+ if (longest == mp->m_dirblksize - (uint)sizeof(*hdr)) {
/*
* Try to punch out the data block.
*/
error = xfs_dir2_shrink_inode(args, db, dbp);
if (error == 0) {
dblk->bp = NULL;
- data = NULL;
+ hdr = NULL;
}
/*
* We can get ENOSPC if there's no space reservation.
@@ -1036,7 +958,7 @@ xfs_dir2_leafn_remove(
* If we got rid of the data block, we can eliminate that entry
* in the free block.
*/
- if (data == NULL) {
+ if (hdr == NULL) {
/*
* One less used entry in the free table.
*/
@@ -1052,7 +974,8 @@ xfs_dir2_leafn_remove(
int i; /* free entry index */
for (i = findex - 1;
- i >= 0 && be16_to_cpu(free->bests[i]) == NULLDATAOFF;
+ i >= 0 &&
+ free->bests[i] == cpu_to_be16(NULLDATAOFF);
i--)
continue;
free->hdr.nvalid = cpu_to_be32(i + 1);
@@ -1209,7 +1132,7 @@ xfs_dir2_leafn_toosmall(
*/
blk = &state->path.blk[state->path.active - 1];
info = blk->bp->data;
- ASSERT(be16_to_cpu(info->magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
leaf = (xfs_dir2_leaf_t *)info;
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = (uint)sizeof(leaf->hdr) + count * (uint)sizeof(leaf->ents[0]);
@@ -1268,7 +1191,7 @@ xfs_dir2_leafn_toosmall(
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = state->blocksize - (state->blocksize >> 2);
leaf = bp->data;
- ASSERT(be16_to_cpu(leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes -= count * (uint)sizeof(leaf->ents[0]);
/*
@@ -1327,8 +1250,8 @@ xfs_dir2_leafn_unbalance(
ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
drop_leaf = drop_blk->bp->data;
save_leaf = save_blk->bp->data;
- ASSERT(be16_to_cpu(drop_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
- ASSERT(be16_to_cpu(save_leaf->hdr.info.magic) == XFS_DIR2_LEAFN_MAGIC);
+ ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
+ ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* If there are any stale leaf entries, take this opportunity
* to purge them.
@@ -1432,7 +1355,7 @@ xfs_dir2_node_addname_int(
xfs_da_args_t *args, /* operation arguments */
xfs_da_state_blk_t *fblk) /* optional freespace block */
{
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t dbno; /* data block number */
xfs_dabuf_t *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
@@ -1469,7 +1392,7 @@ xfs_dir2_node_addname_int(
*/
ifbno = fblk->blkno;
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = fblk->index;
/*
* This means the free entry showed that the data block had
@@ -1553,7 +1476,7 @@ xfs_dir2_node_addname_int(
continue;
}
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = 0;
}
/*
@@ -1641,7 +1564,7 @@ xfs_dir2_node_addname_int(
if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) {
xfs_alert(mp,
- "%s: dir ino " "%llu needed freesp block %lld for\n"
+ "%s: dir ino %llu needed freesp block %lld for\n"
" data block %lld, got %lld ifbno %llu lastfbno %d",
__func__, (unsigned long long)dp->i_ino,
(long long)xfs_dir2_db_to_fdb(mp, dbno),
@@ -1680,12 +1603,12 @@ xfs_dir2_node_addname_int(
free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
free->hdr.firstdb = cpu_to_be32(
(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
- XFS_DIR2_MAX_FREE_BESTS(mp));
+ xfs_dir2_free_max_bests(mp));
free->hdr.nvalid = 0;
free->hdr.nused = 0;
} else {
free = fbp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
}
/*
@@ -1697,7 +1620,7 @@ xfs_dir2_node_addname_int(
* freespace block, extend that table.
*/
if (findex >= be32_to_cpu(free->hdr.nvalid)) {
- ASSERT(findex < XFS_DIR2_MAX_FREE_BESTS(mp));
+ ASSERT(findex < xfs_dir2_free_max_bests(mp));
free->hdr.nvalid = cpu_to_be32(findex + 1);
/*
* Tag new entry so nused will go up.
@@ -1708,7 +1631,7 @@ xfs_dir2_node_addname_int(
* If this entry was for an empty data block
* (this should always be true) then update the header.
*/
- if (be16_to_cpu(free->bests[findex]) == NULLDATAOFF) {
+ if (free->bests[findex] == cpu_to_be16(NULLDATAOFF)) {
be32_add_cpu(&free->hdr.nused, 1);
xfs_dir2_free_log_header(tp, fbp);
}
@@ -1717,8 +1640,8 @@ xfs_dir2_node_addname_int(
* We haven't allocated the data entry yet so this will
* change again.
*/
- data = dbp->data;
- free->bests[findex] = data->hdr.bestfree[0].length;
+ hdr = dbp->data;
+ free->bests[findex] = hdr->bestfree[0].length;
logfree = 1;
}
/*
@@ -1743,21 +1666,21 @@ xfs_dir2_node_addname_int(
xfs_da_buf_done(fbp);
return error;
}
- data = dbp->data;
+ hdr = dbp->data;
logfree = 0;
}
- ASSERT(be16_to_cpu(data->hdr.bestfree[0].length) >= length);
+ ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
/*
* Point to the existing unused space.
*/
dup = (xfs_dir2_data_unused_t *)
- ((char *)data + be16_to_cpu(data->hdr.bestfree[0].offset));
+ ((char *)hdr + be16_to_cpu(hdr->bestfree[0].offset));
needscan = needlog = 0;
/*
* Mark the first part of the unused space, inuse for us.
*/
xfs_dir2_data_use_free(tp, dbp, dup,
- (xfs_dir2_data_aoff_t)((char *)dup - (char *)data), length,
+ (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,
&needlog, &needscan);
/*
* Fill in the new entry and log it.
@@ -1767,13 +1690,13 @@ xfs_dir2_node_addname_int(
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
tagp = xfs_dir2_data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)data);
+ *tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(tp, dbp, dep);
/*
* Rescan the block for bestfree if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(mp, data, &needlog);
+ xfs_dir2_data_freescan(mp, hdr, &needlog);
/*
* Log the data block header if needed.
*/
@@ -1782,8 +1705,8 @@ xfs_dir2_node_addname_int(
/*
* If the freespace entry is now wrong, update it.
*/
- if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(data->hdr.bestfree[0].length)) {
- free->bests[findex] = data->hdr.bestfree[0].length;
+ if (be16_to_cpu(free->bests[findex]) != be16_to_cpu(hdr->bestfree[0].length)) {
+ free->bests[findex] = hdr->bestfree[0].length;
logfree = 1;
}
/*
@@ -1933,7 +1856,7 @@ xfs_dir2_node_replace(
xfs_da_args_t *args) /* operation arguments */
{
xfs_da_state_blk_t *blk; /* leaf block */
- xfs_dir2_data_t *data; /* data block structure */
+ xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry changed */
int error; /* error return value */
int i; /* btree level */
@@ -1977,10 +1900,10 @@ xfs_dir2_node_replace(
/*
* Point to the data entry.
*/
- data = state->extrablk.bp->data;
- ASSERT(be32_to_cpu(data->hdr.magic) == XFS_DIR2_DATA_MAGIC);
+ hdr = state->extrablk.bp->data;
+ ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
dep = (xfs_dir2_data_entry_t *)
- ((char *)data +
+ ((char *)hdr +
xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address)));
ASSERT(inum != be64_to_cpu(dep->inumber));
/*
@@ -2044,7 +1967,7 @@ xfs_dir2_node_trim_free(
return 0;
}
free = bp->data;
- ASSERT(be32_to_cpu(free->hdr.magic) == XFS_DIR2_FREE_MAGIC);
+ ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
/*
* If there are used entries, there's nothing to do.
*/
diff --git a/fs/xfs/xfs_dir2_node.h b/fs/xfs/xfs_dir2_node.h
deleted file mode 100644
index 82dfe71..0000000
--- a/fs/xfs/xfs_dir2_node.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2000,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_NODE_H__
-#define __XFS_DIR2_NODE_H__
-
-/*
- * Directory version 2, btree node format structures
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_da_state;
-struct xfs_da_state_blk;
-struct xfs_inode;
-struct xfs_trans;
-
-/*
- * Offset of the freespace index.
- */
-#define XFS_DIR2_FREE_SPACE 2
-#define XFS_DIR2_FREE_OFFSET (XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE)
-#define XFS_DIR2_FREE_FIRSTDB(mp) \
- xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)
-
-#define XFS_DIR2_FREE_MAGIC 0x58443246 /* XD2F */
-
-typedef struct xfs_dir2_free_hdr {
- __be32 magic; /* XFS_DIR2_FREE_MAGIC */
- __be32 firstdb; /* db of first entry */
- __be32 nvalid; /* count of valid entries */
- __be32 nused; /* count of used entries */
-} xfs_dir2_free_hdr_t;
-
-typedef struct xfs_dir2_free {
- xfs_dir2_free_hdr_t hdr; /* block header */
- __be16 bests[1]; /* best free counts */
- /* unused entries are -1 */
-} xfs_dir2_free_t;
-
-#define XFS_DIR2_MAX_FREE_BESTS(mp) \
- (((mp)->m_dirblksize - (uint)sizeof(xfs_dir2_free_hdr_t)) / \
- (uint)sizeof(xfs_dir2_data_off_t))
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static inline xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return (XFS_DIR2_FREE_FIRSTDB(mp) + (db) / XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static inline int
-xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db)
-{
- return ((db) % XFS_DIR2_MAX_FREE_BESTS(mp));
-}
-
-extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
- struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
- struct xfs_da_args *args, int *indexp,
- struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
- struct xfs_dabuf *leaf2_bp);
-extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
- struct xfs_da_state_blk *oldblk,
- struct xfs_da_state_blk *newblk);
-extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
-extern void xfs_dir2_leafn_unbalance(struct xfs_da_state *state,
- struct xfs_da_state_blk *drop_blk,
- struct xfs_da_state_blk *save_blk);
-extern int xfs_dir2_node_addname(struct xfs_da_args *args);
-extern int xfs_dir2_node_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_node_removename(struct xfs_da_args *args);
-extern int xfs_dir2_node_replace(struct xfs_da_args *args);
-extern int xfs_dir2_node_trim_free(struct xfs_da_args *args, xfs_fileoff_t fo,
- int *rvalp);
-
-#endif /* __XFS_DIR2_NODE_H__ */
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index b1bae6b..79d05e8 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -23,18 +23,16 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_mount.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_error.h"
-#include "xfs_dir2_data.h"
-#include "xfs_dir2_leaf.h"
-#include "xfs_dir2_block.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_format.h"
+#include "xfs_dir2_priv.h"
#include "xfs_trace.h"
/*
@@ -60,6 +58,82 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
#endif /* XFS_BIG_INUMS */
/*
+ * Inode numbers in short-form directories can come in two versions,
+ * either 4 bytes or 8 bytes wide. These helpers deal with the
+ * two forms transparently by looking at the headers i8count field.
+ *
+ * For 64-bit inode number the most significant byte must be zero.
+ */
+static xfs_ino_t
+xfs_dir2_sf_get_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_dir2_inou_t *from)
+{
+ if (hdr->i8count)
+ return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL;
+ else
+ return get_unaligned_be32(&from->i4.i);
+}
+
+static void
+xfs_dir2_sf_put_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_dir2_inou_t *to,
+ xfs_ino_t ino)
+{
+ ASSERT((ino & 0xff00000000000000ULL) == 0);
+
+ if (hdr->i8count)
+ put_unaligned_be64(ino, &to->i8.i);
+ else
+ put_unaligned_be32(ino, &to->i4.i);
+}
+
+xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr)
+{
+ return xfs_dir2_sf_get_ino(hdr, &hdr->parent);
+}
+
+static void
+xfs_dir2_sf_put_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_ino_t ino)
+{
+ xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino);
+}
+
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name. The inode numbers may only be accessed
+ * through the helpers below.
+ */
+static xfs_dir2_inou_t *
+xfs_dir2_sfe_inop(
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return (xfs_dir2_inou_t *)&sfep->name[sfep->namelen];
+}
+
+xfs_ino_t
+xfs_dir2_sfe_get_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return xfs_dir2_sf_get_ino(hdr, xfs_dir2_sfe_inop(sfep));
+}
+
+static void
+xfs_dir2_sfe_put_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep,
+ xfs_ino_t ino)
+{
+ xfs_dir2_sf_put_ino(hdr, xfs_dir2_sfe_inop(sfep), ino);
+}
+
+/*
* Given a block directory (dp/block), calculate its size as a shortform (sf)
* directory and a header for the sf directory, if it will fit it the
* space currently present in the inode. If it won't fit, the output
@@ -68,7 +142,7 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
int /* size for sf form */
xfs_dir2_block_sfsize(
xfs_inode_t *dp, /* incore inode pointer */
- xfs_dir2_block_t *block, /* block directory data */
+ xfs_dir2_data_hdr_t *hdr, /* block directory data */
xfs_dir2_sf_hdr_t *sfhp) /* output: header for sf form */
{
xfs_dir2_dataptr_t addr; /* data entry address */
@@ -88,7 +162,7 @@ xfs_dir2_block_sfsize(
mp = dp->i_mount;
count = i8count = namelen = 0;
- btp = xfs_dir2_block_tail_p(mp, block);
+ btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
@@ -101,7 +175,7 @@ xfs_dir2_block_sfsize(
* Calculate the pointer to the entry at hand.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)block + xfs_dir2_dataptr_to_off(mp, addr));
+ ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr));
/*
* Detect . and .., so we can special-case them.
* . is not included in sf directories.
@@ -138,7 +212,7 @@ xfs_dir2_block_sfsize(
*/
sfhp->count = count;
sfhp->i8count = i8count;
- xfs_dir2_sf_put_inumber((xfs_dir2_sf_t *)sfhp, &parent, &sfhp->parent);
+ xfs_dir2_sf_put_parent_ino(sfhp, parent);
return size;
}
@@ -153,7 +227,7 @@ xfs_dir2_block_to_sf(
int size, /* shortform directory size */
xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
{
- xfs_dir2_block_t *block; /* block structure */
+ xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_block_tail_t *btp; /* block tail pointer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
@@ -164,8 +238,7 @@ xfs_dir2_block_to_sf(
xfs_mount_t *mp; /* filesystem mount point */
char *ptr; /* current data pointer */
xfs_dir2_sf_entry_t *sfep; /* shortform entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
- xfs_ino_t temp;
+ xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */
trace_xfs_dir2_block_to_sf(args);
@@ -176,13 +249,14 @@ xfs_dir2_block_to_sf(
* Make a copy of the block data, so we can shrink the inode
* and add local data.
*/
- block = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
- memcpy(block, bp->data, mp->m_dirblksize);
+ hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
+ memcpy(hdr, bp->data, mp->m_dirblksize);
logflags = XFS_ILOG_CORE;
if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
ASSERT(error != ENOSPC);
goto out;
}
+
/*
* The buffer is now unconditionally gone, whether
* xfs_dir2_shrink_inode worked or not.
@@ -198,14 +272,14 @@ xfs_dir2_block_to_sf(
/*
* Copy the header into the newly allocate local space.
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
dp->i_d.di_size = size;
/*
* Set up to loop over the block's entries.
*/
- btp = xfs_dir2_block_tail_p(mp, block);
- ptr = (char *)block->u;
+ btp = xfs_dir2_block_tail_p(mp, hdr);
+ ptr = (char *)(hdr + 1);
endptr = (char *)xfs_dir2_block_leaf_p(btp);
sfep = xfs_dir2_sf_firstentry(sfp);
/*
@@ -233,7 +307,7 @@ xfs_dir2_block_to_sf(
else if (dep->namelen == 2 &&
dep->name[0] == '.' && dep->name[1] == '.')
ASSERT(be64_to_cpu(dep->inumber) ==
- xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent));
+ xfs_dir2_sf_get_parent_ino(sfp));
/*
* Normal entry, copy it into shortform.
*/
@@ -241,11 +315,11 @@ xfs_dir2_block_to_sf(
sfep->namelen = dep->namelen;
xfs_dir2_sf_put_offset(sfep,
(xfs_dir2_data_aoff_t)
- ((char *)dep - (char *)block));
+ ((char *)dep - (char *)hdr));
memcpy(sfep->name, dep->name, dep->namelen);
- temp = be64_to_cpu(dep->inumber);
- xfs_dir2_sf_put_inumber(sfp, &temp,
- xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep,
+ be64_to_cpu(dep->inumber));
+
sfep = xfs_dir2_sf_nextentry(sfp, sfep);
}
ptr += xfs_dir2_data_entsize(dep->namelen);
@@ -254,7 +328,7 @@ xfs_dir2_block_to_sf(
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(block);
+ kmem_free(hdr);
return error;
}
@@ -277,7 +351,7 @@ xfs_dir2_sf_addname(
xfs_dir2_data_aoff_t offset = 0; /* offset for new entry */
int old_isize; /* di_size before adding name */
int pick; /* which algorithm to use */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
xfs_dir2_sf_entry_t *sfep = NULL; /* shortform entry */
trace_xfs_dir2_sf_addname(args);
@@ -294,19 +368,19 @@ xfs_dir2_sf_addname(
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* Compute entry (and change in) size.
*/
- add_entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+ add_entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
incr_isize = add_entsize;
objchange = 0;
#if XFS_BIG_INUMS
/*
* Do we have to change to 8 byte inodes?
*/
- if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
/*
* Yes, adjust the entry size and the total size.
*/
@@ -314,7 +388,7 @@ xfs_dir2_sf_addname(
(uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t);
incr_isize +=
- (sfp->hdr.count + 2) *
+ (sfp->count + 2) *
((uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t));
objchange = 1;
@@ -384,21 +458,21 @@ xfs_dir2_sf_addname_easy(
{
int byteoff; /* byte offset in sf dir */
xfs_inode_t *dp; /* incore directory inode */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
dp = args->dp;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
byteoff = (int)((char *)sfep - (char *)sfp);
/*
* Grow the in-inode space.
*/
- xfs_idata_realloc(dp, xfs_dir2_sf_entsize_byname(sfp, args->namelen),
+ xfs_idata_realloc(dp, xfs_dir2_sf_entsize(sfp, args->namelen),
XFS_DATA_FORK);
/*
* Need to set up again due to realloc of the inode data.
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
sfep = (xfs_dir2_sf_entry_t *)((char *)sfp + byteoff);
/*
* Fill in the new entry.
@@ -406,15 +480,14 @@ xfs_dir2_sf_addname_easy(
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- xfs_dir2_sf_put_inumber(sfp, &args->inumber,
- xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
/*
* Update the header and inode.
*/
- sfp->hdr.count++;
+ sfp->count++;
#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM)
- sfp->hdr.i8count++;
+ sfp->i8count++;
#endif
dp->i_d.di_size = new_isize;
xfs_dir2_sf_check(args);
@@ -444,19 +517,19 @@ xfs_dir2_sf_addname_hard(
xfs_dir2_data_aoff_t offset; /* current offset value */
int old_isize; /* previous di_size */
xfs_dir2_sf_entry_t *oldsfep; /* entry in original dir */
- xfs_dir2_sf_t *oldsfp; /* original shortform dir */
+ xfs_dir2_sf_hdr_t *oldsfp; /* original shortform dir */
xfs_dir2_sf_entry_t *sfep; /* entry in new dir */
- xfs_dir2_sf_t *sfp; /* new shortform dir */
+ xfs_dir2_sf_hdr_t *sfp; /* new shortform dir */
/*
* Copy the old directory to the stack buffer.
*/
dp = args->dp;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
old_isize = (int)dp->i_d.di_size;
buf = kmem_alloc(old_isize, KM_SLEEP);
- oldsfp = (xfs_dir2_sf_t *)buf;
+ oldsfp = (xfs_dir2_sf_hdr_t *)buf;
memcpy(oldsfp, sfp, old_isize);
/*
* Loop over the old directory finding the place we're going
@@ -485,7 +558,7 @@ xfs_dir2_sf_addname_hard(
/*
* Reset the pointer since the buffer was reallocated.
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
/*
* Copy the first part of the directory, including the header.
*/
@@ -498,12 +571,11 @@ xfs_dir2_sf_addname_hard(
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- xfs_dir2_sf_put_inumber(sfp, &args->inumber,
- xfs_dir2_sf_inumberp(sfep));
- sfp->hdr.count++;
+ xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
+ sfp->count++;
#if XFS_BIG_INUMS
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
- sfp->hdr.i8count++;
+ sfp->i8count++;
#endif
/*
* If there's more left to copy, do that.
@@ -537,14 +609,14 @@ xfs_dir2_sf_addname_pick(
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_data_aoff_t offset; /* data block offset */
xfs_dir2_sf_entry_t *sfep; /* shortform entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
int size; /* entry's data size */
int used; /* data bytes used */
dp = args->dp;
mp = dp->i_mount;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
size = xfs_dir2_data_entsize(args->namelen);
offset = XFS_DIR2_DATA_FIRST_OFFSET;
sfep = xfs_dir2_sf_firstentry(sfp);
@@ -554,7 +626,7 @@ xfs_dir2_sf_addname_pick(
* Keep track of data offset and whether we've seen a place
* to insert the new entry.
*/
- for (i = 0; i < sfp->hdr.count; i++) {
+ for (i = 0; i < sfp->count; i++) {
if (!holefit)
holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
offset = xfs_dir2_sf_get_offset(sfep) +
@@ -566,7 +638,7 @@ xfs_dir2_sf_addname_pick(
* was a data block (block form directory).
*/
used = offset +
- (sfp->hdr.count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (sfp->count + 3) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
(uint)sizeof(xfs_dir2_block_tail_t);
/*
* If it won't fit in a block form then we can't insert it,
@@ -612,30 +684,30 @@ xfs_dir2_sf_check(
xfs_ino_t ino; /* entry inode number */
int offset; /* data offset */
xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
dp = args->dp;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
offset = XFS_DIR2_DATA_FIRST_OFFSET;
- ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
- ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+ ino = xfs_dir2_sfe_get_ino(sfp, sfep);
i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
offset =
xfs_dir2_sf_get_offset(sfep) +
xfs_dir2_data_entsize(sfep->namelen);
}
- ASSERT(i8count == sfp->hdr.i8count);
+ ASSERT(i8count == sfp->i8count);
ASSERT(XFS_BIG_INUMS || i8count == 0);
ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
ASSERT(offset +
- (sfp->hdr.count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
+ (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) +
(uint)sizeof(xfs_dir2_block_tail_t) <=
dp->i_mount->m_dirblksize);
}
@@ -651,7 +723,7 @@ xfs_dir2_sf_create(
{
xfs_inode_t *dp; /* incore directory inode */
int i8count; /* parent inode is an 8-byte number */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
int size; /* directory size */
trace_xfs_dir2_sf_create(args);
@@ -681,13 +753,13 @@ xfs_dir2_sf_create(
/*
* Fill in the header,
*/
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- sfp->hdr.i8count = i8count;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ sfp->i8count = i8count;
/*
* Now can put in the inode number, since i8count is set.
*/
- xfs_dir2_sf_put_inumber(sfp, &pino, &sfp->hdr.parent);
- sfp->hdr.count = 0;
+ xfs_dir2_sf_put_parent_ino(sfp, pino);
+ sfp->count = 0;
dp->i_d.di_size = size;
xfs_dir2_sf_check(args);
xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
@@ -705,7 +777,7 @@ xfs_dir2_sf_getdents(
xfs_mount_t *mp; /* filesystem mount point */
xfs_dir2_dataptr_t off; /* current entry's offset */
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
xfs_dir2_dataptr_t dot_offset;
xfs_dir2_dataptr_t dotdot_offset;
xfs_ino_t ino;
@@ -724,9 +796,9 @@ xfs_dir2_sf_getdents(
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* If the block number in the offset is out of range, we're done.
@@ -759,7 +831,7 @@ xfs_dir2_sf_getdents(
* Put .. entry unless we're starting past it.
*/
if (*offset <= dotdot_offset) {
- ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) {
*offset = dotdot_offset & 0x7fffffff;
return 0;
@@ -770,7 +842,7 @@ xfs_dir2_sf_getdents(
* Loop while there are more entries and put'ing works.
*/
sfep = xfs_dir2_sf_firstentry(sfp);
- for (i = 0; i < sfp->hdr.count; i++) {
+ for (i = 0; i < sfp->count; i++) {
off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk,
xfs_dir2_sf_get_offset(sfep));
@@ -779,7 +851,7 @@ xfs_dir2_sf_getdents(
continue;
}
- ino = xfs_dir2_sf_get_inumber(sfp, xfs_dir2_sf_inumberp(sfep));
+ ino = xfs_dir2_sfe_get_ino(sfp, sfep);
if (filldir(dirent, (char *)sfep->name, sfep->namelen,
off & 0x7fffffff, ino, DT_UNKNOWN)) {
*offset = off & 0x7fffffff;
@@ -805,7 +877,7 @@ xfs_dir2_sf_lookup(
int i; /* entry index */
int error;
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
enum xfs_dacmp cmp; /* comparison result */
xfs_dir2_sf_entry_t *ci_sfep; /* case-insens. entry */
@@ -824,8 +896,8 @@ xfs_dir2_sf_lookup(
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* Special case for .
*/
@@ -839,7 +911,7 @@ xfs_dir2_sf_lookup(
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
- args->inumber = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
args->cmpresult = XFS_CMP_EXACT;
return XFS_ERROR(EEXIST);
}
@@ -847,7 +919,7 @@ xfs_dir2_sf_lookup(
* Loop over all the entries trying to match ours.
*/
ci_sfep = NULL;
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
/*
* Compare name and if it's an exact match, return the inode
@@ -858,8 +930,7 @@ xfs_dir2_sf_lookup(
sfep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
args->cmpresult = cmp;
- args->inumber = xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep));
+ args->inumber = xfs_dir2_sfe_get_ino(sfp, sfep);
if (cmp == XFS_CMP_EXACT)
return XFS_ERROR(EEXIST);
ci_sfep = sfep;
@@ -891,7 +962,7 @@ xfs_dir2_sf_removename(
int newsize; /* new inode size */
int oldsize; /* old inode size */
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
trace_xfs_dir2_sf_removename(args);
@@ -908,32 +979,31 @@ xfs_dir2_sf_removename(
}
ASSERT(dp->i_df.if_bytes == oldsize);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(oldsize >= xfs_dir2_sf_hdr_size(sfp->i8count));
/*
* Loop over the old directory entries.
* Find the one we're deleting.
*/
- for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->hdr.count;
+ for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
- ASSERT(xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep)) ==
- args->inumber);
+ ASSERT(xfs_dir2_sfe_get_ino(sfp, sfep) ==
+ args->inumber);
break;
}
}
/*
* Didn't find it.
*/
- if (i == sfp->hdr.count)
+ if (i == sfp->count)
return XFS_ERROR(ENOENT);
/*
* Calculate sizes.
*/
byteoff = (int)((char *)sfep - (char *)sfp);
- entsize = xfs_dir2_sf_entsize_byname(sfp, args->namelen);
+ entsize = xfs_dir2_sf_entsize(sfp, args->namelen);
newsize = oldsize - entsize;
/*
* Copy the part if any after the removed entry, sliding it down.
@@ -944,22 +1014,22 @@ xfs_dir2_sf_removename(
/*
* Fix up the header and file size.
*/
- sfp->hdr.count--;
+ sfp->count--;
dp->i_d.di_size = newsize;
/*
* Reallocate, making it smaller.
*/
xfs_idata_realloc(dp, newsize - oldsize, XFS_DATA_FORK);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
#if XFS_BIG_INUMS
/*
* Are we changing inode number size?
*/
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM) {
- if (sfp->hdr.i8count == 1)
+ if (sfp->i8count == 1)
xfs_dir2_sf_toino4(args);
else
- sfp->hdr.i8count--;
+ sfp->i8count--;
}
#endif
xfs_dir2_sf_check(args);
@@ -983,7 +1053,7 @@ xfs_dir2_sf_replace(
int i8elevated; /* sf_toino8 set i8count=1 */
#endif
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
- xfs_dir2_sf_t *sfp; /* shortform structure */
+ xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
trace_xfs_dir2_sf_replace(args);
@@ -999,19 +1069,19 @@ xfs_dir2_sf_replace(
}
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->hdr.i8count));
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(dp->i_d.di_size >= xfs_dir2_sf_hdr_size(sfp->i8count));
#if XFS_BIG_INUMS
/*
* New inode number is large, and need to convert to 8-byte inodes.
*/
- if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->hdr.i8count == 0) {
+ if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
int error; /* error return value */
int newsize; /* new inode size */
newsize =
dp->i_df.if_bytes +
- (sfp->hdr.count + 1) *
+ (sfp->count + 1) *
((uint)sizeof(xfs_dir2_ino8_t) -
(uint)sizeof(xfs_dir2_ino4_t));
/*
@@ -1029,7 +1099,7 @@ xfs_dir2_sf_replace(
*/
xfs_dir2_sf_toino8(args);
i8elevated = 1;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
} else
i8elevated = 0;
#endif
@@ -1040,34 +1110,32 @@ xfs_dir2_sf_replace(
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
#if XFS_BIG_INUMS || defined(DEBUG)
- ino = xfs_dir2_sf_get_inumber(sfp, &sfp->hdr.parent);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
ASSERT(args->inumber != ino);
#endif
- xfs_dir2_sf_put_inumber(sfp, &args->inumber, &sfp->hdr.parent);
+ xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
}
/*
* Normal entry, look for the name.
*/
else {
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
#if XFS_BIG_INUMS || defined(DEBUG)
- ino = xfs_dir2_sf_get_inumber(sfp,
- xfs_dir2_sf_inumberp(sfep));
+ ino = xfs_dir2_sfe_get_ino(sfp, sfep);
ASSERT(args->inumber != ino);
#endif
- xfs_dir2_sf_put_inumber(sfp, &args->inumber,
- xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep, args->inumber);
break;
}
}
/*
* Didn't find it.
*/
- if (i == sfp->hdr.count) {
+ if (i == sfp->count) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
#if XFS_BIG_INUMS
if (i8elevated)
@@ -1085,10 +1153,10 @@ xfs_dir2_sf_replace(
/*
* And the old count was one, so need to convert to small.
*/
- if (sfp->hdr.i8count == 1)
+ if (sfp->i8count == 1)
xfs_dir2_sf_toino4(args);
else
- sfp->hdr.i8count--;
+ sfp->i8count--;
}
/*
* See if the old number was small, the new number is large.
@@ -1099,9 +1167,9 @@ xfs_dir2_sf_replace(
* add to the i8count unless we just converted to 8-byte
* inodes (which does an implied i8count = 1)
*/
- ASSERT(sfp->hdr.i8count != 0);
+ ASSERT(sfp->i8count != 0);
if (!i8elevated)
- sfp->hdr.i8count++;
+ sfp->i8count++;
}
#endif
xfs_dir2_sf_check(args);
@@ -1121,13 +1189,12 @@ xfs_dir2_sf_toino4(
char *buf; /* old dir's buffer */
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
- xfs_ino_t ino; /* entry inode number */
int newsize; /* new inode size */
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
- xfs_dir2_sf_t *oldsfp; /* old sf directory */
+ xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
int oldsize; /* old inode size */
xfs_dir2_sf_entry_t *sfep; /* new sf entry */
- xfs_dir2_sf_t *sfp; /* new sf directory */
+ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
trace_xfs_dir2_sf_toino4(args);
@@ -1140,44 +1207,42 @@ xfs_dir2_sf_toino4(
*/
oldsize = dp->i_df.if_bytes;
buf = kmem_alloc(oldsize, KM_SLEEP);
- oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(oldsfp->hdr.i8count == 1);
+ oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(oldsfp->i8count == 1);
memcpy(buf, oldsfp, oldsize);
/*
* Compute the new inode size.
*/
newsize =
oldsize -
- (oldsfp->hdr.count + 1) *
+ (oldsfp->count + 1) *
((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
/*
* Reset our pointers, the data has moved.
*/
- oldsfp = (xfs_dir2_sf_t *)buf;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
/*
* Fill in the new header.
*/
- sfp->hdr.count = oldsfp->hdr.count;
- sfp->hdr.i8count = 0;
- ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
- xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
+ sfp->count = oldsfp->count;
+ sfp->i8count = 0;
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
sfep->offset = oldsfep->offset;
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- ino = xfs_dir2_sf_get_inumber(oldsfp,
- xfs_dir2_sf_inumberp(oldsfep));
- xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep,
+ xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
}
/*
* Clean up the inode.
@@ -1199,13 +1264,12 @@ xfs_dir2_sf_toino8(
char *buf; /* old dir's buffer */
xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
- xfs_ino_t ino; /* entry inode number */
int newsize; /* new inode size */
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
- xfs_dir2_sf_t *oldsfp; /* old sf directory */
+ xfs_dir2_sf_hdr_t *oldsfp; /* old sf directory */
int oldsize; /* old inode size */
xfs_dir2_sf_entry_t *sfep; /* new sf entry */
- xfs_dir2_sf_t *sfp; /* new sf directory */
+ xfs_dir2_sf_hdr_t *sfp; /* new sf directory */
trace_xfs_dir2_sf_toino8(args);
@@ -1218,44 +1282,42 @@ xfs_dir2_sf_toino8(
*/
oldsize = dp->i_df.if_bytes;
buf = kmem_alloc(oldsize, KM_SLEEP);
- oldsfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
- ASSERT(oldsfp->hdr.i8count == 0);
+ oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
+ ASSERT(oldsfp->i8count == 0);
memcpy(buf, oldsfp, oldsize);
/*
* Compute the new inode size.
*/
newsize =
oldsize +
- (oldsfp->hdr.count + 1) *
+ (oldsfp->count + 1) *
((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t));
xfs_idata_realloc(dp, -oldsize, XFS_DATA_FORK);
xfs_idata_realloc(dp, newsize, XFS_DATA_FORK);
/*
* Reset our pointers, the data has moved.
*/
- oldsfp = (xfs_dir2_sf_t *)buf;
- sfp = (xfs_dir2_sf_t *)dp->i_df.if_u1.if_data;
+ oldsfp = (xfs_dir2_sf_hdr_t *)buf;
+ sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
/*
* Fill in the new header.
*/
- sfp->hdr.count = oldsfp->hdr.count;
- sfp->hdr.i8count = 1;
- ino = xfs_dir2_sf_get_inumber(oldsfp, &oldsfp->hdr.parent);
- xfs_dir2_sf_put_inumber(sfp, &ino, &sfp->hdr.parent);
+ sfp->count = oldsfp->count;
+ sfp->i8count = 1;
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
- i < sfp->hdr.count;
+ i < sfp->count;
i++, sfep = xfs_dir2_sf_nextentry(sfp, sfep),
oldsfep = xfs_dir2_sf_nextentry(oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
sfep->offset = oldsfep->offset;
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- ino = xfs_dir2_sf_get_inumber(oldsfp,
- xfs_dir2_sf_inumberp(oldsfep));
- xfs_dir2_sf_put_inumber(sfp, &ino, xfs_dir2_sf_inumberp(sfep));
+ xfs_dir2_sfe_put_ino(sfp, sfep,
+ xfs_dir2_sfe_get_ino(oldsfp, oldsfep));
}
/*
* Clean up the inode.
diff --git a/fs/xfs/xfs_dir2_sf.h b/fs/xfs/xfs_dir2_sf.h
deleted file mode 100644
index 6ac44b5..0000000
--- a/fs/xfs/xfs_dir2_sf.h
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2000-2001,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DIR2_SF_H__
-#define __XFS_DIR2_SF_H__
-
-/*
- * Directory layout when stored internal to an inode.
- *
- * Small directories are packed as tightly as possible so as to
- * fit into the literal area of the inode.
- */
-
-struct uio;
-struct xfs_dabuf;
-struct xfs_da_args;
-struct xfs_dir2_block;
-struct xfs_inode;
-struct xfs_mount;
-struct xfs_trans;
-
-/*
- * Inode number stored as 8 8-bit values.
- */
-typedef struct { __uint8_t i[8]; } xfs_dir2_ino8_t;
-
-/*
- * Inode number stored as 4 8-bit values.
- * Works a lot of the time, when all the inode numbers in a directory
- * fit in 32 bits.
- */
-typedef struct { __uint8_t i[4]; } xfs_dir2_ino4_t;
-
-typedef union {
- xfs_dir2_ino8_t i8;
- xfs_dir2_ino4_t i4;
-} xfs_dir2_inou_t;
-#define XFS_DIR2_MAX_SHORT_INUM ((xfs_ino_t)0xffffffffULL)
-
-/*
- * Normalized offset (in a data block) of the entry, really xfs_dir2_data_off_t.
- * Only need 16 bits, this is the byte offset into the single block form.
- */
-typedef struct { __uint8_t i[2]; } __arch_pack xfs_dir2_sf_off_t;
-
-/*
- * The parent directory has a dedicated field, and the self-pointer must
- * be calculated on the fly.
- *
- * Entries are packed toward the top as tightly as possible. The header
- * and the elements must be memcpy'd out into a work area to get correct
- * alignment for the inode number fields.
- */
-typedef struct xfs_dir2_sf_hdr {
- __uint8_t count; /* count of entries */
- __uint8_t i8count; /* count of 8-byte inode #s */
- xfs_dir2_inou_t parent; /* parent dir inode number */
-} __arch_pack xfs_dir2_sf_hdr_t;
-
-typedef struct xfs_dir2_sf_entry {
- __uint8_t namelen; /* actual name length */
- xfs_dir2_sf_off_t offset; /* saved offset */
- __uint8_t name[1]; /* name, variable size */
- xfs_dir2_inou_t inumber; /* inode number, var. offset */
-} __arch_pack xfs_dir2_sf_entry_t;
-
-typedef struct xfs_dir2_sf {
- xfs_dir2_sf_hdr_t hdr; /* shortform header */
- xfs_dir2_sf_entry_t list[1]; /* shortform entries */
-} xfs_dir2_sf_t;
-
-static inline int xfs_dir2_sf_hdr_size(int i8count)
-{
- return ((uint)sizeof(xfs_dir2_sf_hdr_t) - \
- ((i8count) == 0) * \
- ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline xfs_dir2_inou_t *xfs_dir2_sf_inumberp(xfs_dir2_sf_entry_t *sfep)
-{
- return (xfs_dir2_inou_t *)&(sfep)->name[(sfep)->namelen];
-}
-
-static inline xfs_intino_t
-xfs_dir2_sf_get_inumber(xfs_dir2_sf_t *sfp, xfs_dir2_inou_t *from)
-{
- return ((sfp)->hdr.i8count == 0 ? \
- (xfs_intino_t)XFS_GET_DIR_INO4((from)->i4) : \
- (xfs_intino_t)XFS_GET_DIR_INO8((from)->i8));
-}
-
-static inline void xfs_dir2_sf_put_inumber(xfs_dir2_sf_t *sfp, xfs_ino_t *from,
- xfs_dir2_inou_t *to)
-{
- if ((sfp)->hdr.i8count == 0)
- XFS_PUT_DIR_INO4(*(from), (to)->i4);
- else
- XFS_PUT_DIR_INO8(*(from), (to)->i8);
-}
-
-static inline xfs_dir2_data_aoff_t
-xfs_dir2_sf_get_offset(xfs_dir2_sf_entry_t *sfep)
-{
- return INT_GET_UNALIGNED_16_BE(&(sfep)->offset.i);
-}
-
-static inline void
-xfs_dir2_sf_put_offset(xfs_dir2_sf_entry_t *sfep, xfs_dir2_data_aoff_t off)
-{
- INT_SET_UNALIGNED_16_BE(&(sfep)->offset.i, off);
-}
-
-static inline int xfs_dir2_sf_entsize_byname(xfs_dir2_sf_t *sfp, int len)
-{
- return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (len) - \
- ((sfp)->hdr.i8count == 0) * \
- ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline int
-xfs_dir2_sf_entsize_byentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
- return ((uint)sizeof(xfs_dir2_sf_entry_t) - 1 + (sfep)->namelen - \
- ((sfp)->hdr.i8count == 0) * \
- ((uint)sizeof(xfs_dir2_ino8_t) - (uint)sizeof(xfs_dir2_ino4_t)));
-}
-
-static inline xfs_dir2_sf_entry_t *xfs_dir2_sf_firstentry(xfs_dir2_sf_t *sfp)
-{
- return ((xfs_dir2_sf_entry_t *) \
- ((char *)(sfp) + xfs_dir2_sf_hdr_size(sfp->hdr.i8count)));
-}
-
-static inline xfs_dir2_sf_entry_t *
-xfs_dir2_sf_nextentry(xfs_dir2_sf_t *sfp, xfs_dir2_sf_entry_t *sfep)
-{
- return ((xfs_dir2_sf_entry_t *) \
- ((char *)(sfep) + xfs_dir2_sf_entsize_byentry(sfp,sfep)));
-}
-
-/*
- * Functions.
- */
-extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
- struct xfs_dir2_block *block,
- xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
- int size, xfs_dir2_sf_hdr_t *sfhp);
-extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
-extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
-extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent,
- xfs_off_t *offset, filldir_t filldir);
-extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
-extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
-extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
-
-#endif /* __XFS_DIR2_SF_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index d22e626..35c2aff 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -217,7 +217,7 @@ xfs_efi_item_committing(
/*
* This is the ops vector shared by all efi log items.
*/
-static struct xfs_item_ops xfs_efi_item_ops = {
+static const struct xfs_item_ops xfs_efi_item_ops = {
.iop_size = xfs_efi_item_size,
.iop_format = xfs_efi_item_format,
.iop_pin = xfs_efi_item_pin,
@@ -477,7 +477,7 @@ xfs_efd_item_committing(
/*
* This is the ops vector shared by all efd log items.
*/
-static struct xfs_item_ops xfs_efd_item_ops = {
+static const struct xfs_item_ops xfs_efd_item_ops = {
.iop_size = xfs_efd_item_size,
.iop_format = xfs_efd_item_format,
.iop_pin = xfs_efd_item_pin,
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9124425..5170306 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -344,9 +344,9 @@ _xfs_filestream_update_ag(
* Either ip is a regular file and pip is a directory, or ip is a
* directory and pip is NULL.
*/
- ASSERT(ip && (((ip->i_d.di_mode & S_IFREG) && pip &&
- (pip->i_d.di_mode & S_IFDIR)) ||
- ((ip->i_d.di_mode & S_IFDIR) && !pip)));
+ ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip &&
+ S_ISDIR(pip->i_d.di_mode)) ||
+ (S_ISDIR(ip->i_d.di_mode) && !pip)));
mp = ip->i_mount;
cache = mp->m_filestream;
@@ -537,7 +537,7 @@ xfs_filestream_lookup_ag(
xfs_agnumber_t ag;
int ref;
- if (!(ip->i_d.di_mode & (S_IFREG | S_IFDIR))) {
+ if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) {
ASSERT(0);
return NULLAGNUMBER;
}
@@ -579,9 +579,9 @@ xfs_filestream_associate(
xfs_agnumber_t ag, rotorstep, startag;
int err = 0;
- ASSERT(pip->i_d.di_mode & S_IFDIR);
- ASSERT(ip->i_d.di_mode & S_IFREG);
- if (!(pip->i_d.di_mode & S_IFDIR) || !(ip->i_d.di_mode & S_IFREG))
+ ASSERT(S_ISDIR(pip->i_d.di_mode));
+ ASSERT(S_ISREG(ip->i_d.di_mode));
+ if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode))
return -EINVAL;
mp = pip->i_mount;
@@ -682,7 +682,7 @@ xfs_filestream_new_ag(
ip = ap->ip;
mp = ip->i_mount;
cache = mp->m_filestream;
- minlen = ap->alen;
+ minlen = ap->length;
*agp = NULLAGNUMBER;
/*
@@ -761,7 +761,7 @@ xfs_filestream_new_ag(
*/
ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;
flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |
- (ap->low ? XFS_PICK_LOWSPACE : 0);
+ (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0);
err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen);
if (err || *agp == NULLAGNUMBER)
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h
index 8f6fc1a..c13fed8 100644
--- a/fs/xfs/xfs_fs.h
+++ b/fs/xfs/xfs_fs.h
@@ -249,6 +249,11 @@ typedef struct xfs_fsop_resblks {
#define XFS_MAX_LOG_BYTES \
((2 * 1024 * 1024 * 1024ULL) - XFS_MIN_LOG_BYTES)
+/* Used for sanity checks on superblock */
+#define XFS_MAX_DBLOCKS(s) ((xfs_drfsbno_t)(s)->sb_agcount * (s)->sb_agblocks)
+#define XFS_MIN_DBLOCKS(s) ((xfs_drfsbno_t)((s)->sb_agcount - 1) * \
+ (s)->sb_agblocks + XFS_MIN_AG_BLOCKS)
+
/*
* Structures for XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG & XFS_IOC_FSGROWFSRT
*/
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 9153d2c..1c6fdeb 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -194,6 +194,10 @@ xfs_growfs_data_private(
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
+ if (!bp) {
+ error = ENOMEM;
+ goto error0;
+ }
agf = XFS_BUF_TO_AGF(bp);
memset(agf, 0, mp->m_sb.sb_sectsize);
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
@@ -216,16 +220,21 @@ xfs_growfs_data_private(
tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
agf->agf_freeblks = cpu_to_be32(tmpsize);
agf->agf_longest = cpu_to_be32(tmpsize);
- error = xfs_bwrite(mp, bp);
- if (error) {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error)
goto error0;
- }
+
/*
* AG inode header block
*/
bp = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), XBF_LOCK | XBF_MAPPED);
+ if (!bp) {
+ error = ENOMEM;
+ goto error0;
+ }
agi = XFS_BUF_TO_AGI(bp);
memset(agi, 0, mp->m_sb.sb_sectsize);
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
@@ -240,10 +249,11 @@ xfs_growfs_data_private(
agi->agi_dirino = cpu_to_be32(NULLAGINO);
for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
- error = xfs_bwrite(mp, bp);
- if (error) {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error)
goto error0;
- }
+
/*
* BNO btree root block
*/
@@ -251,6 +261,10 @@ xfs_growfs_data_private(
XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize),
XBF_LOCK | XBF_MAPPED);
+ if (!bp) {
+ error = ENOMEM;
+ goto error0;
+ }
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTB_MAGIC);
@@ -262,10 +276,11 @@ xfs_growfs_data_private(
arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp));
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
- error = xfs_bwrite(mp, bp);
- if (error) {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error)
goto error0;
- }
+
/*
* CNT btree root block
*/
@@ -273,6 +288,10 @@ xfs_growfs_data_private(
XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize),
XBF_LOCK | XBF_MAPPED);
+ if (!bp) {
+ error = ENOMEM;
+ goto error0;
+ }
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_ABTC_MAGIC);
@@ -285,10 +304,11 @@ xfs_growfs_data_private(
arec->ar_blockcount = cpu_to_be32(
agsize - be32_to_cpu(arec->ar_startblock));
nfree += be32_to_cpu(arec->ar_blockcount);
- error = xfs_bwrite(mp, bp);
- if (error) {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error)
goto error0;
- }
+
/*
* INO btree root block
*/
@@ -296,6 +316,10 @@ xfs_growfs_data_private(
XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
BTOBB(mp->m_sb.sb_blocksize),
XBF_LOCK | XBF_MAPPED);
+ if (!bp) {
+ error = ENOMEM;
+ goto error0;
+ }
block = XFS_BUF_TO_BLOCK(bp);
memset(block, 0, mp->m_sb.sb_blocksize);
block->bb_magic = cpu_to_be32(XFS_IBT_MAGIC);
@@ -303,10 +327,10 @@ xfs_growfs_data_private(
block->bb_numrecs = 0;
block->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK);
block->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK);
- error = xfs_bwrite(mp, bp);
- if (error) {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error)
goto error0;
- }
}
xfs_trans_agblocks_delta(tp, nfree);
/*
@@ -396,9 +420,9 @@ xfs_growfs_data_private(
* just issue a warning and continue. The real work is
* already done and committed.
*/
- if (!(error = xfs_bwrite(mp, bp))) {
- continue;
- } else {
+ error = xfs_bwrite(bp);
+ xfs_buf_relse(bp);
+ if (error) {
xfs_warn(mp,
"write error %d updating secondary superblock for ag %d",
error, agno);
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 84ebeec..169380e 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -150,7 +150,7 @@ xfs_check_agi_freecount(
/*
* Initialise a new set of inodes.
*/
-STATIC void
+STATIC int
xfs_ialloc_inode_init(
struct xfs_mount *mp,
struct xfs_trans *tp,
@@ -202,9 +202,8 @@ xfs_ialloc_inode_init(
fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
mp->m_bsize * blks_per_cluster,
XBF_LOCK);
- ASSERT(fbuf);
- ASSERT(!XFS_BUF_GETERROR(fbuf));
-
+ if (!fbuf)
+ return ENOMEM;
/*
* Initialize all inodes in this buffer and then log them.
*
@@ -226,6 +225,7 @@ xfs_ialloc_inode_init(
}
xfs_trans_inode_alloc_buf(tp, fbuf);
}
+ return 0;
}
/*
@@ -370,9 +370,11 @@ xfs_ialloc_ag_alloc(
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno, args.len,
- random32());
+ error = xfs_ialloc_inode_init(args.mp, tp, agno, args.agbno,
+ args.len, random32());
+ if (error)
+ return error;
/*
* Convert the results.
*/
@@ -683,7 +685,7 @@ xfs_dialloc(
return 0;
}
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
} else {
/*
* Continue where we left off before. In this case, we
@@ -691,7 +693,7 @@ xfs_dialloc(
*/
agbp = *IO_agbp;
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
}
mp = tp->t_mountp;
@@ -775,7 +777,7 @@ nextag:
if (error)
goto nextag;
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
}
/*
* Here with an allocation group that has a free inode.
@@ -944,7 +946,7 @@ nextag:
* See if the most recently allocated block has any free.
*/
newino:
- if (be32_to_cpu(agi->agi_newino) != NULLAGINO) {
+ if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
XFS_LOOKUP_EQ, &i);
if (error)
@@ -1085,7 +1087,7 @@ xfs_difree(
return error;
}
agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
ASSERT(agbno < be32_to_cpu(agi->agi_length));
/*
* Initialize the cursor.
@@ -1438,7 +1440,7 @@ xfs_ialloc_log_agi(
xfs_agi_t *agi; /* allocation group header */
agi = XFS_BUF_TO_AGI(bp);
- ASSERT(be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC);
+ ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
#endif
/*
* Compute byte offsets for the first and last fields.
@@ -1486,13 +1488,13 @@ xfs_read_agi(
if (error)
return error;
- ASSERT(*bpp && !XFS_BUF_GETERROR(*bpp));
+ ASSERT(!xfs_buf_geterror(*bpp));
agi = XFS_BUF_TO_AGI(*bpp);
/*
* Validate the magic number of the agi block.
*/
- agi_ok = be32_to_cpu(agi->agi_magicnum) == XFS_AGI_MAGIC &&
+ agi_ok = agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC) &&
XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)) &&
be32_to_cpu(agi->agi_seqno) == agno;
if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI,
@@ -1503,7 +1505,7 @@ xfs_read_agi(
return XFS_ERROR(EFSCORRUPTED);
}
- XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGI, XFS_AGI_REF);
+ xfs_buf_set_ref(*bpp, XFS_AGI_REF);
xfs_check_agi_unlinked(agi);
return 0;
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c
index 16921f5..c6a7581 100644
--- a/fs/xfs/xfs_ialloc_btree.c
+++ b/fs/xfs/xfs_ialloc_btree.c
@@ -31,7 +31,6 @@
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
#include "xfs_error.h"
@@ -205,72 +204,6 @@ xfs_inobt_recs_inorder(
}
#endif /* DEBUG */
-#ifdef XFS_BTREE_TRACE
-ktrace_t *xfs_inobt_trace_buf;
-
-STATIC void
-xfs_inobt_trace_enter(
- struct xfs_btree_cur *cur,
- const char *func,
- char *s,
- int type,
- int line,
- __psunsigned_t a0,
- __psunsigned_t a1,
- __psunsigned_t a2,
- __psunsigned_t a3,
- __psunsigned_t a4,
- __psunsigned_t a5,
- __psunsigned_t a6,
- __psunsigned_t a7,
- __psunsigned_t a8,
- __psunsigned_t a9,
- __psunsigned_t a10)
-{
- ktrace_enter(xfs_inobt_trace_buf, (void *)(__psint_t)type,
- (void *)func, (void *)s, NULL, (void *)cur,
- (void *)a0, (void *)a1, (void *)a2, (void *)a3,
- (void *)a4, (void *)a5, (void *)a6, (void *)a7,
- (void *)a8, (void *)a9, (void *)a10);
-}
-
-STATIC void
-xfs_inobt_trace_cursor(
- struct xfs_btree_cur *cur,
- __uint32_t *s0,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *s0 = cur->bc_private.a.agno;
- *l0 = cur->bc_rec.i.ir_startino;
- *l1 = cur->bc_rec.i.ir_free;
-}
-
-STATIC void
-xfs_inobt_trace_key(
- struct xfs_btree_cur *cur,
- union xfs_btree_key *key,
- __uint64_t *l0,
- __uint64_t *l1)
-{
- *l0 = be32_to_cpu(key->inobt.ir_startino);
- *l1 = 0;
-}
-
-STATIC void
-xfs_inobt_trace_record(
- struct xfs_btree_cur *cur,
- union xfs_btree_rec *rec,
- __uint64_t *l0,
- __uint64_t *l1,
- __uint64_t *l2)
-{
- *l0 = be32_to_cpu(rec->inobt.ir_startino);
- *l1 = be32_to_cpu(rec->inobt.ir_freecount);
- *l2 = be64_to_cpu(rec->inobt.ir_free);
-}
-#endif /* XFS_BTREE_TRACE */
-
static const struct xfs_btree_ops xfs_inobt_ops = {
.rec_len = sizeof(xfs_inobt_rec_t),
.key_len = sizeof(xfs_inobt_key_t),
@@ -286,18 +219,10 @@ static const struct xfs_btree_ops xfs_inobt_ops = {
.init_rec_from_cur = xfs_inobt_init_rec_from_cur,
.init_ptr_from_cur = xfs_inobt_init_ptr_from_cur,
.key_diff = xfs_inobt_key_diff,
-
#ifdef DEBUG
.keys_inorder = xfs_inobt_keys_inorder,
.recs_inorder = xfs_inobt_recs_inorder,
#endif
-
-#ifdef XFS_BTREE_TRACE
- .trace_enter = xfs_inobt_trace_enter,
- .trace_cursor = xfs_inobt_trace_cursor,
- .trace_key = xfs_inobt_trace_key,
- .trace_record = xfs_inobt_trace_record,
-#endif
};
/*
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index ca752f0..cfc4277 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -38,7 +38,6 @@
#include "xfs_trans_priv.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
-#include "xfs_btree_trace.h"
#include "xfs_trace.h"
@@ -76,7 +75,6 @@ xfs_inode_alloc(
return NULL;
}
- ASSERT(atomic_read(&ip->i_iocount) == 0);
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
@@ -151,7 +149,6 @@ xfs_inode_free(
}
/* asserts to verify all state is correct here */
- ASSERT(atomic_read(&ip->i_iocount) == 0);
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 5715279..755ee81 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -37,7 +37,6 @@
#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
#include "xfs_btree.h"
-#include "xfs_btree_trace.h"
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_bmap.h"
@@ -52,7 +51,7 @@ kmem_zone_t *xfs_ifork_zone;
kmem_zone_t *xfs_inode_zone;
/*
- * Used in xfs_itruncate(). This is the maximum number of extents
+ * Used in xfs_itruncate_extents(). This is the maximum number of extents
* freed from a file in a single transaction.
*/
#define XFS_ITRUNC_MAX_EXTENTS 2
@@ -167,7 +166,7 @@ xfs_imap_to_bp(
dip = (xfs_dinode_t *)xfs_buf_offset(bp,
(i << mp->m_sb.sb_inodelog));
- di_ok = be16_to_cpu(dip->di_magic) == XFS_DINODE_MAGIC &&
+ di_ok = dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) &&
XFS_DINODE_GOOD_VERSION(dip->di_version);
if (unlikely(XFS_TEST_ERROR(!di_ok, mp,
XFS_ERRTAG_ITOBP_INOTOBP,
@@ -191,12 +190,6 @@ xfs_imap_to_bp(
}
xfs_inobp_check(mp, bp);
-
- /*
- * Mark the buffer as an inode buffer now that it looks good
- */
- XFS_BUF_SET_VTYPE(bp, B_FS_INO);
-
*bpp = bp;
return 0;
}
@@ -369,7 +362,7 @@ xfs_iformat(
/*
* no local regular files yet
*/
- if (unlikely((be16_to_cpu(dip->di_mode) & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(be16_to_cpu(dip->di_mode)))) {
xfs_warn(ip->i_mount,
"corrupt inode %Lu (local format for regular file).",
(unsigned long long) ip->i_ino);
@@ -802,7 +795,7 @@ xfs_iread(
* If we got something that isn't an inode it means someone
* (nfs or dmi) has a stale handle.
*/
- if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) {
+ if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC)) {
#ifdef DEBUG
xfs_alert(mp,
"%s: dip->di_magic (0x%x) != XFS_DINODE_MAGIC (0x%x)",
@@ -1041,7 +1034,7 @@ xfs_ialloc(
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
- if ((pip->i_d.di_mode & S_ISGID) && (mode & S_IFMT) == S_IFDIR) {
+ if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
ip->i_d.di_mode |= S_ISGID;
}
}
@@ -1098,14 +1091,14 @@ xfs_ialloc(
if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {
uint di_flags = 0;
- if ((mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
ip->i_d.di_extsize = pip->i_d.di_extsize;
}
- } else if ((mode & S_IFMT) == S_IFREG) {
+ } else if (S_ISREG(mode)) {
if (pip->i_d.di_flags & XFS_DIFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_REALTIME;
if (pip->i_d.di_flags & XFS_DIFLAG_EXTSZINHERIT) {
@@ -1153,7 +1146,7 @@ xfs_ialloc(
/*
* Log the new values stuffed into the inode.
*/
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, flags);
/* now that we have an i_mode we can setup inode ops and unlock */
@@ -1179,17 +1172,18 @@ xfs_ialloc(
* at least do it for regular files.
*/
#ifdef DEBUG
-void
+STATIC void
xfs_isize_check(
- xfs_mount_t *mp,
- xfs_inode_t *ip,
- xfs_fsize_t isize)
+ struct xfs_inode *ip,
+ xfs_fsize_t isize)
{
- xfs_fileoff_t map_first;
- int nimaps;
- xfs_bmbt_irec_t imaps[2];
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t map_first;
+ int nimaps;
+ xfs_bmbt_irec_t imaps[2];
+ int error;
- if ((ip->i_d.di_mode & S_IFMT) != S_IFREG)
+ if (!S_ISREG(ip->i_d.di_mode))
return;
if (XFS_IS_REALTIME_INODE(ip))
@@ -1204,178 +1198,23 @@ xfs_isize_check(
* The filesystem could be shutting down, so bmapi may return
* an error.
*/
- if (xfs_bmapi(NULL, ip, map_first,
+ error = xfs_bmapi_read(ip, map_first,
(XFS_B_TO_FSB(mp,
- (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) -
- map_first),
- XFS_BMAPI_ENTIRE, NULL, 0, imaps, &nimaps,
- NULL))
- return;
+ (xfs_ufsize_t)XFS_MAXIOFFSET(mp)) - map_first),
+ imaps, &nimaps, XFS_BMAPI_ENTIRE);
+ if (error)
+ return;
ASSERT(nimaps == 1);
ASSERT(imaps[0].br_startblock == HOLESTARTBLOCK);
}
+#else /* DEBUG */
+#define xfs_isize_check(ip, isize)
#endif /* DEBUG */
/*
- * Calculate the last possible buffered byte in a file. This must
- * include data that was buffered beyond the EOF by the write code.
- * This also needs to deal with overflowing the xfs_fsize_t type
- * which can happen for sizes near the limit.
- *
- * We also need to take into account any blocks beyond the EOF. It
- * may be the case that they were buffered by a write which failed.
- * In that case the pages will still be in memory, but the inode size
- * will never have been updated.
- */
-STATIC xfs_fsize_t
-xfs_file_last_byte(
- xfs_inode_t *ip)
-{
- xfs_mount_t *mp;
- xfs_fsize_t last_byte;
- xfs_fileoff_t last_block;
- xfs_fileoff_t size_last_block;
- int error;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL|XFS_IOLOCK_SHARED));
-
- mp = ip->i_mount;
- /*
- * Only check for blocks beyond the EOF if the extents have
- * been read in. This eliminates the need for the inode lock,
- * and it also saves us from looking when it really isn't
- * necessary.
- */
- if (ip->i_df.if_flags & XFS_IFEXTENTS) {
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_bmap_last_offset(NULL, ip, &last_block,
- XFS_DATA_FORK);
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
- if (error) {
- last_block = 0;
- }
- } else {
- last_block = 0;
- }
- size_last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)ip->i_size);
- last_block = XFS_FILEOFF_MAX(last_block, size_last_block);
-
- last_byte = XFS_FSB_TO_B(mp, last_block);
- if (last_byte < 0) {
- return XFS_MAXIOFFSET(mp);
- }
- last_byte += (1 << mp->m_writeio_log);
- if (last_byte < 0) {
- return XFS_MAXIOFFSET(mp);
- }
- return last_byte;
-}
-
-/*
- * Start the truncation of the file to new_size. The new size
- * must be smaller than the current size. This routine will
- * clear the buffer and page caches of file data in the removed
- * range, and xfs_itruncate_finish() will remove the underlying
- * disk blocks.
- *
- * The inode must have its I/O lock locked EXCLUSIVELY, and it
- * must NOT have the inode lock held at all. This is because we're
- * calling into the buffer/page cache code and we can't hold the
- * inode lock when we do so.
- *
- * We need to wait for any direct I/Os in flight to complete before we
- * proceed with the truncate. This is needed to prevent the extents
- * being read or written by the direct I/Os from being removed while the
- * I/O is in flight as there is no other method of synchronising
- * direct I/O with the truncate operation. Also, because we hold
- * the IOLOCK in exclusive mode, we prevent new direct I/Os from being
- * started until the truncate completes and drops the lock. Essentially,
- * the xfs_ioend_wait() call forms an I/O barrier that provides strict
- * ordering between direct I/Os and the truncate operation.
- *
- * The flags parameter can have either the value XFS_ITRUNC_DEFINITE
- * or XFS_ITRUNC_MAYBE. The XFS_ITRUNC_MAYBE value should be used
- * in the case that the caller is locking things out of order and
- * may not be able to call xfs_itruncate_finish() with the inode lock
- * held without dropping the I/O lock. If the caller must drop the
- * I/O lock before calling xfs_itruncate_finish(), then xfs_itruncate_start()
- * must be called again with all the same restrictions as the initial
- * call.
- */
-int
-xfs_itruncate_start(
- xfs_inode_t *ip,
- uint flags,
- xfs_fsize_t new_size)
-{
- xfs_fsize_t last_byte;
- xfs_off_t toss_start;
- xfs_mount_t *mp;
- int error = 0;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- ASSERT((new_size == 0) || (new_size <= ip->i_size));
- ASSERT((flags == XFS_ITRUNC_DEFINITE) ||
- (flags == XFS_ITRUNC_MAYBE));
-
- mp = ip->i_mount;
-
- /* wait for the completion of any pending DIOs */
- if (new_size == 0 || new_size < ip->i_size)
- xfs_ioend_wait(ip);
-
- /*
- * Call toss_pages or flushinval_pages to get rid of pages
- * overlapping the region being removed. We have to use
- * the less efficient flushinval_pages in the case that the
- * caller may not be able to finish the truncate without
- * dropping the inode's I/O lock. Make sure
- * to catch any pages brought in by buffers overlapping
- * the EOF by searching out beyond the isize by our
- * block size. We round new_size up to a block boundary
- * so that we don't toss things on the same block as
- * new_size but before it.
- *
- * Before calling toss_page or flushinval_pages, make sure to
- * call remapf() over the same region if the file is mapped.
- * This frees up mapped file references to the pages in the
- * given range and for the flushinval_pages case it ensures
- * that we get the latest mapped changes flushed out.
- */
- toss_start = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- toss_start = XFS_FSB_TO_B(mp, toss_start);
- if (toss_start < 0) {
- /*
- * The place to start tossing is beyond our maximum
- * file size, so there is no way that the data extended
- * out there.
- */
- return 0;
- }
- last_byte = xfs_file_last_byte(ip);
- trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte);
- if (last_byte > toss_start) {
- if (flags & XFS_ITRUNC_DEFINITE) {
- xfs_tosspages(ip, toss_start,
- -1, FI_REMAPF_LOCKED);
- } else {
- error = xfs_flushinval_pages(ip, toss_start,
- -1, FI_REMAPF_LOCKED);
- }
- }
-
-#ifdef DEBUG
- if (new_size == 0) {
- ASSERT(VN_CACHED(VFS_I(ip)) == 0);
- }
-#endif
- return error;
-}
-
-/*
- * Shrink the file to the given new_size. The new size must be smaller than
- * the current size. This will free up the underlying blocks in the removed
- * range after a call to xfs_itruncate_start() or xfs_atruncate_start().
+ * Free up the underlying blocks past new_size. The new size must be smaller
+ * than the current size. This routine can be used both for the attribute and
+ * data fork, and does not modify the inode size, which is left to the caller.
*
* The transaction passed to this routine must have made a permanent log
* reservation of at least XFS_ITRUNCATE_LOG_RES. This routine may commit the
@@ -1387,31 +1226,6 @@ xfs_itruncate_start(
* will be "held" within the returned transaction. This routine does NOT
* require any disk space to be reserved for it within the transaction.
*
- * The fork parameter must be either xfs_attr_fork or xfs_data_fork, and it
- * indicates the fork which is to be truncated. For the attribute fork we only
- * support truncation to size 0.
- *
- * We use the sync parameter to indicate whether or not the first transaction
- * we perform might have to be synchronous. For the attr fork, it needs to be
- * so if the unlink of the inode is not yet known to be permanent in the log.
- * This keeps us from freeing and reusing the blocks of the attribute fork
- * before the unlink of the inode becomes permanent.
- *
- * For the data fork, we normally have to run synchronously if we're being
- * called out of the inactive path or we're being called out of the create path
- * where we're truncating an existing file. Either way, the truncate needs to
- * be sync so blocks don't reappear in the file with altered data in case of a
- * crash. wsync filesystems can run the first case async because anything that
- * shrinks the inode has to run sync so by the time we're called here from
- * inactive, the inode size is permanently set to 0.
- *
- * Calls from the truncate path always need to be sync unless we're in a wsync
- * filesystem and the file has already been unlinked.
- *
- * The caller is responsible for correctly setting the sync parameter. It gets
- * too hard for us to guess here which path we're being called out of just
- * based on inode state.
- *
* If we get an error, we must return with the inode locked and linked into the
* current transaction. This keeps things simple for the higher level code,
* because it always knows that the inode is locked and held in the transaction
@@ -1419,116 +1233,30 @@ xfs_itruncate_start(
* dirty on error so that transactions can be easily aborted if possible.
*/
int
-xfs_itruncate_finish(
- xfs_trans_t **tp,
- xfs_inode_t *ip,
- xfs_fsize_t new_size,
- int fork,
- int sync)
+xfs_itruncate_extents(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ int whichfork,
+ xfs_fsize_t new_size)
{
- xfs_fsblock_t first_block;
- xfs_fileoff_t first_unmap_block;
- xfs_fileoff_t last_block;
- xfs_filblks_t unmap_len=0;
- xfs_mount_t *mp;
- xfs_trans_t *ntp;
- int done;
- int committed;
- xfs_bmap_free_t free_list;
- int error;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp = *tpp;
+ struct xfs_trans *ntp;
+ xfs_bmap_free_t free_list;
+ xfs_fsblock_t first_block;
+ xfs_fileoff_t first_unmap_block;
+ xfs_fileoff_t last_block;
+ xfs_filblks_t unmap_len;
+ int committed;
+ int error = 0;
+ int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
- ASSERT((new_size == 0) || (new_size <= ip->i_size));
- ASSERT(*tp != NULL);
- ASSERT((*tp)->t_flags & XFS_TRANS_PERM_LOG_RES);
- ASSERT(ip->i_transp == *tp);
+ ASSERT(new_size <= ip->i_size);
+ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
ASSERT(ip->i_itemp->ili_lock_flags == 0);
-
-
- ntp = *tp;
- mp = (ntp)->t_mountp;
- ASSERT(! XFS_NOT_DQATTACHED(mp, ip));
-
- /*
- * We only support truncating the entire attribute fork.
- */
- if (fork == XFS_ATTR_FORK) {
- new_size = 0LL;
- }
- first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- trace_xfs_itruncate_finish_start(ip, new_size);
-
- /*
- * The first thing we do is set the size to new_size permanently
- * on disk. This way we don't have to worry about anyone ever
- * being able to look at the data being freed even in the face
- * of a crash. What we're getting around here is the case where
- * we free a block, it is allocated to another file, it is written
- * to, and then we crash. If the new data gets written to the
- * file but the log buffers containing the free and reallocation
- * don't, then we'd end up with garbage in the blocks being freed.
- * As long as we make the new_size permanent before actually
- * freeing any blocks it doesn't matter if they get written to.
- *
- * The callers must signal into us whether or not the size
- * setting here must be synchronous. There are a few cases
- * where it doesn't have to be synchronous. Those cases
- * occur if the file is unlinked and we know the unlink is
- * permanent or if the blocks being truncated are guaranteed
- * to be beyond the inode eof (regardless of the link count)
- * and the eof value is permanent. Both of these cases occur
- * only on wsync-mounted filesystems. In those cases, we're
- * guaranteed that no user will ever see the data in the blocks
- * that are being truncated so the truncate can run async.
- * In the free beyond eof case, the file may wind up with
- * more blocks allocated to it than it needs if we crash
- * and that won't get fixed until the next time the file
- * is re-opened and closed but that's ok as that shouldn't
- * be too many blocks.
- *
- * However, we can't just make all wsync xactions run async
- * because there's one call out of the create path that needs
- * to run sync where it's truncating an existing file to size
- * 0 whose size is > 0.
- *
- * It's probably possible to come up with a test in this
- * routine that would correctly distinguish all the above
- * cases from the values of the function parameters and the
- * inode state but for sanity's sake, I've decided to let the
- * layers above just tell us. It's simpler to correctly figure
- * out in the layer above exactly under what conditions we
- * can run async and I think it's easier for others read and
- * follow the logic in case something has to be changed.
- * cscope is your friend -- rcc.
- *
- * The attribute fork is much simpler.
- *
- * For the attribute fork we allow the caller to tell us whether
- * the unlink of the inode that led to this call is yet permanent
- * in the on disk log. If it is not and we will be freeing extents
- * in this inode then we make the first transaction synchronous
- * to make sure that the unlink is permanent by the time we free
- * the blocks.
- */
- if (fork == XFS_DATA_FORK) {
- if (ip->i_d.di_nextents > 0) {
- /*
- * If we are not changing the file size then do
- * not update the on-disk file size - we may be
- * called from xfs_inactive_free_eofblocks(). If we
- * update the on-disk file size and then the system
- * crashes before the contents of the file are
- * flushed to disk then the files may be full of
- * holes (ie NULL files bug).
- */
- if (ip->i_size != new_size) {
- ip->i_d.di_size = new_size;
- ip->i_size = new_size;
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
- }
- }
- }
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
/*
* Since it is possible for space to become allocated beyond
@@ -1539,128 +1267,142 @@ xfs_itruncate_finish(
* beyond the maximum file size (ie it is the same as last_block),
* then there is nothing to do.
*/
+ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
- ASSERT(first_unmap_block <= last_block);
- done = 0;
- if (last_block == first_unmap_block) {
- done = 1;
- } else {
- unmap_len = last_block - first_unmap_block + 1;
- }
+ if (first_unmap_block == last_block)
+ return 0;
+
+ ASSERT(first_unmap_block < last_block);
+ unmap_len = last_block - first_unmap_block + 1;
while (!done) {
- /*
- * Free up up to XFS_ITRUNC_MAX_EXTENTS. xfs_bunmapi()
- * will tell us whether it freed the entire range or
- * not. If this is a synchronous mount (wsync),
- * then we can tell bunmapi to keep all the
- * transactions asynchronous since the unlink
- * transaction that made this inode inactive has
- * already hit the disk. There's no danger of
- * the freed blocks being reused, there being a
- * crash, and the reused blocks suddenly reappearing
- * in this file with garbage in them once recovery
- * runs.
- */
xfs_bmap_init(&free_list, &first_block);
- error = xfs_bunmapi(ntp, ip,
+ error = xfs_bunmapi(tp, ip,
first_unmap_block, unmap_len,
- xfs_bmapi_aflag(fork),
+ xfs_bmapi_aflag(whichfork),
XFS_ITRUNC_MAX_EXTENTS,
&first_block, &free_list,
&done);
- if (error) {
- /*
- * If the bunmapi call encounters an error,
- * return to the caller where the transaction
- * can be properly aborted. We just need to
- * make sure we're not holding any resources
- * that we were not when we came in.
- */
- xfs_bmap_cancel(&free_list);
- return error;
- }
+ if (error)
+ goto out_bmap_cancel;
/*
* Duplicate the transaction that has the permanent
* reservation and commit the old transaction.
*/
- error = xfs_bmap_finish(tp, &free_list, &committed);
- ntp = *tp;
+ error = xfs_bmap_finish(&tp, &free_list, &committed);
if (committed)
- xfs_trans_ijoin(ntp, ip);
-
- if (error) {
- /*
- * If the bmap finish call encounters an error, return
- * to the caller where the transaction can be properly
- * aborted. We just need to make sure we're not
- * holding any resources that we were not when we came
- * in.
- *
- * Aborting from this point might lose some blocks in
- * the file system, but oh well.
- */
- xfs_bmap_cancel(&free_list);
- return error;
- }
+ xfs_trans_ijoin(tp, ip, 0);
+ if (error)
+ goto out_bmap_cancel;
if (committed) {
/*
* Mark the inode dirty so it will be logged and
* moved forward in the log as part of every commit.
*/
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
- ntp = xfs_trans_dup(ntp);
- error = xfs_trans_commit(*tp, 0);
- *tp = ntp;
+ ntp = xfs_trans_dup(tp);
+ error = xfs_trans_commit(tp, 0);
+ tp = ntp;
- xfs_trans_ijoin(ntp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
if (error)
- return error;
+ goto out;
+
/*
- * transaction commit worked ok so we can drop the extra ticket
+ * Transaction commit worked ok so we can drop the extra ticket
* reference that we gained in xfs_trans_dup()
*/
- xfs_log_ticket_put(ntp->t_ticket);
- error = xfs_trans_reserve(ntp, 0,
+ xfs_log_ticket_put(tp->t_ticket);
+ error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES,
XFS_ITRUNCATE_LOG_COUNT);
if (error)
- return error;
+ goto out;
}
+
+out:
+ *tpp = tp;
+ return error;
+out_bmap_cancel:
/*
- * Only update the size in the case of the data fork, but
- * always re-log the inode so that our permanent transaction
- * can keep on rolling it forward in the log.
+ * If the bunmapi call encounters an error, return to the caller where
+ * the transaction can be properly aborted. We just need to make sure
+ * we're not holding any resources that we were not when we came in.
*/
- if (fork == XFS_DATA_FORK) {
- xfs_isize_check(mp, ip, new_size);
+ xfs_bmap_cancel(&free_list);
+ goto out;
+}
+
+int
+xfs_itruncate_data(
+ struct xfs_trans **tpp,
+ struct xfs_inode *ip,
+ xfs_fsize_t new_size)
+{
+ int error;
+
+ trace_xfs_itruncate_data_start(ip, new_size);
+
+ /*
+ * The first thing we do is set the size to new_size permanently on
+ * disk. This way we don't have to worry about anyone ever being able
+ * to look at the data being freed even in the face of a crash.
+ * What we're getting around here is the case where we free a block, it
+ * is allocated to another file, it is written to, and then we crash.
+ * If the new data gets written to the file but the log buffers
+ * containing the free and reallocation don't, then we'd end up with
+ * garbage in the blocks being freed. As long as we make the new_size
+ * permanent before actually freeing any blocks it doesn't matter if
+ * they get written to.
+ */
+ if (ip->i_d.di_nextents > 0) {
/*
- * If we are not changing the file size then do
- * not update the on-disk file size - we may be
- * called from xfs_inactive_free_eofblocks(). If we
- * update the on-disk file size and then the system
- * crashes before the contents of the file are
- * flushed to disk then the files may be full of
- * holes (ie NULL files bug).
+ * If we are not changing the file size then do not update
+ * the on-disk file size - we may be called from
+ * xfs_inactive_free_eofblocks(). If we update the on-disk
+ * file size and then the system crashes before the contents
+ * of the file are flushed to disk then the files may be
+ * full of holes (ie NULL files bug).
*/
if (ip->i_size != new_size) {
ip->i_d.di_size = new_size;
ip->i_size = new_size;
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
}
}
- xfs_trans_log_inode(ntp, ip, XFS_ILOG_CORE);
- ASSERT((new_size != 0) ||
- (fork == XFS_ATTR_FORK) ||
- (ip->i_delayed_blks == 0));
- ASSERT((new_size != 0) ||
- (fork == XFS_ATTR_FORK) ||
- (ip->i_d.di_nextents == 0));
- trace_xfs_itruncate_finish_end(ip, new_size);
+
+ error = xfs_itruncate_extents(tpp, ip, XFS_DATA_FORK, new_size);
+ if (error)
+ return error;
+
+ /*
+ * If we are not changing the file size then do not update the on-disk
+ * file size - we may be called from xfs_inactive_free_eofblocks().
+ * If we update the on-disk file size and then the system crashes
+ * before the contents of the file are flushed to disk then the files
+ * may be full of holes (ie NULL files bug).
+ */
+ xfs_isize_check(ip, new_size);
+ if (ip->i_size != new_size) {
+ ip->i_d.di_size = new_size;
+ ip->i_size = new_size;
+ }
+
+ ASSERT(new_size != 0 || ip->i_delayed_blks == 0);
+ ASSERT(new_size != 0 || ip->i_d.di_nextents == 0);
+
+ /*
+ * Always re-log the inode so that our permanent transaction can keep
+ * on rolling it forward in the log.
+ */
+ xfs_trans_log_inode(*tpp, ip, XFS_ILOG_CORE);
+
+ trace_xfs_itruncate_data_end(ip, new_size);
return 0;
}
@@ -1686,7 +1428,6 @@ xfs_iunlink(
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_mode != 0);
- ASSERT(ip->i_transp == tp);
mp = tp->t_mountp;
@@ -1709,7 +1450,7 @@ xfs_iunlink(
ASSERT(agi->agi_unlinked[bucket_index]);
ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != agino);
- if (be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO) {
+ if (agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO)) {
/*
* There is already another inode in the bucket we need
* to add ourselves to. Add us at the front of the list.
@@ -1720,8 +1461,7 @@ xfs_iunlink(
if (error)
return error;
- ASSERT(be32_to_cpu(dip->di_next_unlinked) == NULLAGINO);
- /* both on-disk, don't endian flip twice */
+ ASSERT(dip->di_next_unlinked == cpu_to_be32(NULLAGINO));
dip->di_next_unlinked = agi->agi_unlinked[bucket_index];
offset = ip->i_imap.im_boffset +
offsetof(xfs_dinode_t, di_next_unlinked);
@@ -1786,7 +1526,7 @@ xfs_iunlink_remove(
agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
ASSERT(agino != 0);
bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS;
- ASSERT(be32_to_cpu(agi->agi_unlinked[bucket_index]) != NULLAGINO);
+ ASSERT(agi->agi_unlinked[bucket_index] != cpu_to_be32(NULLAGINO));
ASSERT(agi->agi_unlinked[bucket_index]);
if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
@@ -1898,7 +1638,7 @@ xfs_iunlink_remove(
* inodes that are in memory - they all must be marked stale and attached to
* the cluster buffer.
*/
-STATIC void
+STATIC int
xfs_ifree_cluster(
xfs_inode_t *free_ip,
xfs_trans_t *tp,
@@ -1944,6 +1684,8 @@ xfs_ifree_cluster(
mp->m_bsize * blks_per_cluster,
XBF_LOCK);
+ if (!bp)
+ return ENOMEM;
/*
* Walk the inodes already attached to the buffer and mark them
* stale. These will all have the flush locks held, so an
@@ -1951,7 +1693,7 @@ xfs_ifree_cluster(
* stale first, we will not attempt to lock them in the loop
* below as the XFS_ISTALE flag will be set.
*/
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ lip = bp->b_fspriv;
while (lip) {
if (lip->li_type == XFS_LI_INODE) {
iip = (xfs_inode_log_item_t *)lip;
@@ -2053,6 +1795,7 @@ retry:
}
xfs_perag_put(pag);
+ return 0;
}
/*
@@ -2078,12 +1821,11 @@ xfs_ifree(
xfs_buf_t *ibp;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_transp == tp);
ASSERT(ip->i_d.di_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
ASSERT((ip->i_d.di_size == 0 && ip->i_size == 0) ||
- ((ip->i_d.di_mode & S_IFMT) != S_IFREG));
+ (!S_ISREG(ip->i_d.di_mode)));
ASSERT(ip->i_d.di_nblocks == 0);
/*
@@ -2133,10 +1875,10 @@ xfs_ifree(
dip->di_mode = 0;
if (delete) {
- xfs_ifree_cluster(ip, tp, first_ino);
+ error = xfs_ifree_cluster(ip, tp, first_ino);
}
- return 0;
+ return error;
}
/*
@@ -2725,13 +2467,13 @@ cluster_corrupt_out:
* mark the buffer as an error and call them. Otherwise
* mark it as stale and brelse.
*/
- if (XFS_BUF_IODONE_FUNC(bp)) {
+ if (bp->b_iodone) {
XFS_BUF_UNDONE(bp);
- XFS_BUF_STALE(bp);
- XFS_BUF_ERROR(bp,EIO);
+ xfs_buf_stale(bp);
+ xfs_buf_ioerror(bp, EIO);
xfs_buf_ioend(bp, 0);
} else {
- XFS_BUF_STALE(bp);
+ xfs_buf_stale(bp);
xfs_buf_relse(bp);
}
}
@@ -2840,7 +2582,7 @@ xfs_iflush(
* If the buffer is pinned then push on the log now so we won't
* get stuck waiting in the write for too long.
*/
- if (XFS_BUF_ISPINNED(bp))
+ if (xfs_buf_ispinned(bp))
xfs_log_force(mp, 0);
/*
@@ -2852,9 +2594,11 @@ xfs_iflush(
goto cluster_corrupt_out;
if (flags & SYNC_WAIT)
- error = xfs_bwrite(mp, bp);
+ error = xfs_bwrite(bp);
else
- xfs_bdwrite(mp, bp);
+ xfs_buf_delwri_queue(bp);
+
+ xfs_buf_relse(bp);
return error;
corrupt_out:
@@ -2912,7 +2656,7 @@ xfs_iflush_int(
*/
xfs_synchronize_times(ip);
- if (XFS_TEST_ERROR(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC,
+ if (XFS_TEST_ERROR(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC),
mp, XFS_ERRTAG_IFLUSH_1, XFS_RANDOM_IFLUSH_1)) {
xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
"%s: Bad inode %Lu magic number 0x%x, ptr 0x%p",
@@ -2926,7 +2670,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip, ip->i_d.di_magic);
goto corrupt_out;
}
- if ((ip->i_d.di_mode & S_IFMT) == S_IFREG) {
+ if (S_ISREG(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -2936,7 +2680,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip);
goto corrupt_out;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ } else if (S_ISDIR(ip->i_d.di_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
@@ -3065,8 +2809,8 @@ xfs_iflush_int(
*/
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- ASSERT(XFS_BUF_IODONE_FUNC(bp) != NULL);
+ ASSERT(bp->b_fspriv != NULL);
+ ASSERT(bp->b_iodone != NULL);
} else {
/*
* We're flushing an inode which is not in the AIL and has
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 28b3596..b4cd473 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -241,7 +241,6 @@ typedef struct xfs_inode {
xfs_ifork_t i_df; /* data fork */
/* Transaction and locking information. */
- struct xfs_trans *i_transp; /* ptr to owning transaction*/
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
mrlock_t i_iolock; /* inode IO lock */
@@ -258,13 +257,12 @@ typedef struct xfs_inode {
xfs_fsize_t i_size; /* in-memory size */
xfs_fsize_t i_new_size; /* size when write completes */
- atomic_t i_iocount; /* outstanding I/O count */
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
} xfs_inode_t;
-#define XFS_ISIZE(ip) (((ip)->i_d.di_mode & S_IFMT) == S_IFREG) ? \
+#define XFS_ISIZE(ip) S_ISREG((ip)->i_d.di_mode) ? \
(ip)->i_size : (ip)->i_d.di_size;
/* Convert from vfs inode to xfs inode */
@@ -458,16 +456,6 @@ static inline void xfs_ifunlock(xfs_inode_t *ip)
extern struct lock_class_key xfs_iolock_reclaimable;
/*
- * Flags for xfs_itruncate_start().
- */
-#define XFS_ITRUNC_DEFINITE 0x1
-#define XFS_ITRUNC_MAYBE 0x2
-
-#define XFS_ITRUNC_FLAGS \
- { XFS_ITRUNC_DEFINITE, "DEFINITE" }, \
- { XFS_ITRUNC_MAYBE, "MAYBE" }
-
-/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
* new subdirectory gets S_ISGID bit from parent.
@@ -501,9 +489,10 @@ uint xfs_ip2xflags(struct xfs_inode *);
uint xfs_dic2xflags(struct xfs_dinode *);
int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
-int xfs_itruncate_start(xfs_inode_t *, uint, xfs_fsize_t);
-int xfs_itruncate_finish(struct xfs_trans **, xfs_inode_t *,
- xfs_fsize_t, int, int);
+int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
+ int, xfs_fsize_t);
+int xfs_itruncate_data(struct xfs_trans **, struct xfs_inode *,
+ xfs_fsize_t);
int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
void xfs_iext_realloc(xfs_inode_t *, int, int);
@@ -580,13 +569,6 @@ void xfs_iext_irec_update_extoffs(xfs_ifork_t *, int, int);
#define xfs_ipincount(ip) ((unsigned int) atomic_read(&ip->i_pincount))
-#ifdef DEBUG
-void xfs_isize_check(struct xfs_mount *, struct xfs_inode *,
- xfs_fsize_t);
-#else /* DEBUG */
-#define xfs_isize_check(mp, ip, isize)
-#endif /* DEBUG */
-
#if defined(DEBUG)
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
#else
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 391044c..abaafdb 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -632,13 +632,8 @@ xfs_inode_item_unlock(
struct xfs_inode *ip = iip->ili_inode;
unsigned short lock_flags;
- ASSERT(iip->ili_inode->i_itemp != NULL);
- ASSERT(xfs_isilocked(iip->ili_inode, XFS_ILOCK_EXCL));
-
- /*
- * Clear the transaction pointer in the inode.
- */
- ip->i_transp = NULL;
+ ASSERT(ip->i_itemp != NULL);
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
/*
* If the inode needed a separate buffer with which to log
@@ -663,10 +658,8 @@ xfs_inode_item_unlock(
lock_flags = iip->ili_lock_flags;
iip->ili_lock_flags = 0;
- if (lock_flags) {
- xfs_iunlock(iip->ili_inode, lock_flags);
- IRELE(iip->ili_inode);
- }
+ if (lock_flags)
+ xfs_iunlock(ip, lock_flags);
}
/*
@@ -742,7 +735,7 @@ xfs_inode_item_pushbuf(
return true;
if (XFS_BUF_ISDELAYWRITE(bp))
xfs_buf_delwri_promote(bp);
- if (XFS_BUF_ISPINNED(bp))
+ if (xfs_buf_ispinned(bp))
ret = false;
xfs_buf_relse(bp);
return ret;
@@ -802,7 +795,7 @@ xfs_inode_item_committing(
/*
* This is the ops vector shared by all buf log items.
*/
-static struct xfs_item_ops xfs_inode_item_ops = {
+static const struct xfs_item_ops xfs_inode_item_ops = {
.iop_size = xfs_inode_item_size,
.iop_format = xfs_inode_item_format,
.iop_pin = xfs_inode_item_pin,
@@ -883,7 +876,7 @@ xfs_iflush_done(
* Scan the buffer IO completions for other inodes being completed and
* attach them to the current inode log item.
*/
- blip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
+ blip = bp->b_fspriv;
prev = NULL;
while (blip != NULL) {
if (lip->li_cb != xfs_iflush_done) {
@@ -895,7 +888,7 @@ xfs_iflush_done(
/* remove from list */
next = blip->li_bio_list;
if (!prev) {
- XFS_BUF_SET_FSPRIVATE(bp, next);
+ bp->b_fspriv = next;
} else {
prev->li_bio_list = next;
}
diff --git a/fs/xfs/xfs_inum.h b/fs/xfs/xfs_inum.h
index b8e4ee4..b253c0e 100644
--- a/fs/xfs/xfs_inum.h
+++ b/fs/xfs/xfs_inum.h
@@ -28,17 +28,6 @@
typedef __uint32_t xfs_agino_t; /* within allocation grp inode number */
-/*
- * Useful inode bits for this kernel.
- * Used in some places where having 64-bits in the 32-bit kernels
- * costs too much.
- */
-#if XFS_BIG_INUMS
-typedef xfs_ino_t xfs_intino_t;
-#else
-typedef __uint32_t xfs_intino_t;
-#endif
-
#define NULLFSINO ((xfs_ino_t)-1)
#define NULLAGINO ((xfs_agino_t)-1)
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 091d82b..9afa282 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -208,22 +208,20 @@ xfs_iomap_write_direct(
if (error)
goto error1;
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
- bmapi_flag = XFS_BMAPI_WRITE;
+ bmapi_flag = 0;
if (offset < ip->i_size || extsz)
bmapi_flag |= XFS_BMAPI_PREALLOC;
/*
- * Issue the xfs_bmapi() call to allocate the blocks.
- *
* From this point onwards we overwrite the imap pointer that the
* caller gave to us.
*/
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
- error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
- &firstfsb, 0, imap, &nimaps, &free_list);
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag,
+ &firstfsb, 0, imap, &nimaps, &free_list);
if (error)
goto error0;
@@ -300,8 +298,8 @@ xfs_iomap_eof_want_preallocate(
while (count_fsb > 0) {
imaps = nimaps;
firstblock = NULLFSBLOCK;
- error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
- &firstblock, 0, imap, &imaps, NULL);
+ error = xfs_bmapi_read(ip, start_fsb, count_fsb, imap, &imaps,
+ 0);
if (error)
return error;
for (n = 0; n < imaps; n++) {
@@ -381,7 +379,6 @@ xfs_iomap_write_delay(
xfs_fileoff_t last_fsb;
xfs_off_t aligned_offset;
xfs_fileoff_t ioalign;
- xfs_fsblock_t firstblock;
xfs_extlen_t extsz;
int nimaps;
xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
@@ -425,12 +422,8 @@ retry:
}
nimaps = XFS_WRITE_IMAPS;
- firstblock = NULLFSBLOCK;
- error = xfs_bmapi(NULL, ip, offset_fsb,
- (xfs_filblks_t)(last_fsb - offset_fsb),
- XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
- XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
- &nimaps, NULL);
+ error = xfs_bmapi_delay(ip, offset_fsb, last_fsb - offset_fsb,
+ imap, &nimaps, XFS_BMAPI_ENTIRE);
switch (error) {
case 0:
case ENOSPC:
@@ -535,7 +528,7 @@ xfs_iomap_write_allocate(
return XFS_ERROR(error);
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
xfs_bmap_init(&free_list, &first_block);
@@ -587,14 +580,12 @@ xfs_iomap_write_allocate(
}
/*
- * Go get the actual blocks.
- *
* From this point onwards we overwrite the imap
* pointer that the caller gave to us.
*/
- error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
- XFS_BMAPI_WRITE, &first_block, 1,
- imap, &nimaps, &free_list);
+ error = xfs_bmapi_write(tp, ip, map_start_fsb,
+ count_fsb, 0, &first_block, 1,
+ imap, &nimaps, &free_list);
if (error)
goto trans_cancel;
@@ -701,15 +692,15 @@ xfs_iomap_write_unwritten(
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
/*
* Modify the unwritten extent state of the buffer.
*/
xfs_bmap_init(&free_list, &firstfsb);
nimaps = 1;
- error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
- XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
+ XFS_BMAPI_CONVERT, &firstfsb,
1, &imap, &nimaps, &free_list);
if (error)
goto error_on_bmapi_transaction;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 41d5b8f..10ca5e5 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -150,6 +150,117 @@ xlog_grant_add_space(
} while (head_val != old);
}
+STATIC bool
+xlog_reserveq_wake(
+ struct log *log,
+ int *free_bytes)
+{
+ struct xlog_ticket *tic;
+ int need_bytes;
+
+ list_for_each_entry(tic, &log->l_reserveq, t_queue) {
+ if (tic->t_flags & XLOG_TIC_PERM_RESERV)
+ need_bytes = tic->t_unit_res * tic->t_cnt;
+ else
+ need_bytes = tic->t_unit_res;
+
+ if (*free_bytes < need_bytes)
+ return false;
+ *free_bytes -= need_bytes;
+
+ trace_xfs_log_grant_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+
+ return true;
+}
+
+STATIC bool
+xlog_writeq_wake(
+ struct log *log,
+ int *free_bytes)
+{
+ struct xlog_ticket *tic;
+ int need_bytes;
+
+ list_for_each_entry(tic, &log->l_writeq, t_queue) {
+ ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
+
+ need_bytes = tic->t_unit_res;
+
+ if (*free_bytes < need_bytes)
+ return false;
+ *free_bytes -= need_bytes;
+
+ trace_xfs_log_regrant_write_wake_up(log, tic);
+ wake_up(&tic->t_wait);
+ }
+
+ return true;
+}
+
+STATIC int
+xlog_reserveq_wait(
+ struct log *log,
+ struct xlog_ticket *tic,
+ int need_bytes)
+{
+ list_add_tail(&tic->t_queue, &log->l_reserveq);
+
+ do {
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_grant_sleep(log, tic);
+
+ xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+ trace_xfs_log_grant_wake(log, tic);
+
+ spin_lock(&log->l_grant_reserve_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
+
+ list_del_init(&tic->t_queue);
+ return 0;
+shutdown:
+ list_del_init(&tic->t_queue);
+ return XFS_ERROR(EIO);
+}
+
+STATIC int
+xlog_writeq_wait(
+ struct log *log,
+ struct xlog_ticket *tic,
+ int need_bytes)
+{
+ list_add_tail(&tic->t_queue, &log->l_writeq);
+
+ do {
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ xlog_grant_push_ail(log, need_bytes);
+
+ XFS_STATS_INC(xs_sleep_logspace);
+ trace_xfs_log_regrant_write_sleep(log, tic);
+
+ xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
+ trace_xfs_log_regrant_write_wake(log, tic);
+
+ spin_lock(&log->l_grant_write_lock);
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto shutdown;
+ } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
+
+ list_del_init(&tic->t_queue);
+ return 0;
+shutdown:
+ list_del_init(&tic->t_queue);
+ return XFS_ERROR(EIO);
+}
+
static void
xlog_tic_reset_res(xlog_ticket_t *tic)
{
@@ -350,8 +461,19 @@ xfs_log_reserve(
retval = xlog_grant_log_space(log, internal_ticket);
}
+ if (unlikely(retval)) {
+ /*
+ * If we are failing, make sure the ticket doesn't have any
+ * current reservations. We don't want to add this back
+ * when the ticket/ transaction gets cancelled.
+ */
+ internal_ticket->t_curr_res = 0;
+ /* ungrant will give back unit_res * t_cnt. */
+ internal_ticket->t_cnt = 0;
+ }
+
return retval;
-} /* xfs_log_reserve */
+}
/*
@@ -531,8 +653,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
.lv_iovecp = &reg,
};
- /* remove inited flag */
+ /* remove inited flag, and account for space used */
tic->t_flags = 0;
+ tic->t_curr_res -= sizeof(magic);
error = xlog_write(log, &vec, tic, &lsn,
NULL, XLOG_UNMOUNT_TRANS);
/*
@@ -626,7 +749,7 @@ xfs_log_item_init(
struct xfs_mount *mp,
struct xfs_log_item *item,
int type,
- struct xfs_item_ops *ops)
+ const struct xfs_item_ops *ops)
{
item->li_mountp = mp;
item->li_ailp = mp->m_ail;
@@ -871,23 +994,17 @@ xlog_space_left(
void
xlog_iodone(xfs_buf_t *bp)
{
- xlog_in_core_t *iclog;
- xlog_t *l;
- int aborted;
-
- iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
- ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long) 2);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
- aborted = 0;
- l = iclog->ic_log;
+ xlog_in_core_t *iclog = bp->b_fspriv;
+ xlog_t *l = iclog->ic_log;
+ int aborted = 0;
/*
* Race to shutdown the filesystem if we see an error.
*/
- if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
+ if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp,
XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
- xfs_ioerror_alert("xlog_iodone", l->l_mp, bp, XFS_BUF_ADDR(bp));
- XFS_BUF_STALE(bp);
+ xfs_buf_ioerror_alert(bp, __func__);
+ xfs_buf_stale(bp);
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
/*
* This flag will be propagated to the trans-committed
@@ -1053,13 +1170,11 @@ xlog_alloc_log(xfs_mount_t *mp,
xlog_get_iclog_buffer_size(mp, log);
error = ENOMEM;
- bp = xfs_buf_get_empty(log->l_iclog_size, mp->m_logdev_targp);
+ bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0);
if (!bp)
goto out_free_log;
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
+ bp->b_iodone = xlog_iodone;
+ ASSERT(xfs_buf_islocked(bp));
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
@@ -1090,10 +1205,8 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_iclog_size, 0);
if (!bp)
goto out_free_iclog;
- if (!XFS_BUF_CPSEMA(bp))
- ASSERT(0);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_iodone);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)1);
+
+ bp->b_iodone = xlog_iodone;
iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr;
#ifdef DEBUG
@@ -1117,8 +1230,7 @@ xlog_alloc_log(xfs_mount_t *mp,
iclog->ic_callback_tail = &(iclog->ic_callback);
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
- ASSERT(XFS_BUF_ISBUSY(iclog->ic_bp));
- ASSERT(XFS_BUF_VALUSEMA(iclog->ic_bp) <= 0);
+ ASSERT(xfs_buf_islocked(iclog->ic_bp));
init_waitqueue_head(&iclog->ic_force_wait);
init_waitqueue_head(&iclog->ic_write_wait);
@@ -1254,12 +1366,11 @@ STATIC int
xlog_bdstrat(
struct xfs_buf *bp)
{
- struct xlog_in_core *iclog;
+ struct xlog_in_core *iclog = bp->b_fspriv;
- iclog = XFS_BUF_FSPRIVATE(bp, xlog_in_core_t *);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
- XFS_BUF_ERROR(bp, EIO);
- XFS_BUF_STALE(bp);
+ xfs_buf_ioerror(bp, EIO);
+ xfs_buf_stale(bp);
xfs_buf_ioend(bp, 0);
/*
* It would seem logical to return EIO here, but we rely on
@@ -1269,7 +1380,6 @@ xlog_bdstrat(
return 0;
}
- bp->b_flags |= _XBF_RUN_QUEUES;
xfs_buf_iorequest(bp);
return 0;
}
@@ -1351,8 +1461,6 @@ xlog_sync(xlog_t *log,
}
bp = iclog->ic_bp;
- ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) == (unsigned long)1);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
XFS_STATS_ADD(xs_log_blocks, BTOBB(count));
@@ -1366,22 +1474,27 @@ xlog_sync(xlog_t *log,
iclog->ic_bwritecnt = 1;
}
XFS_BUF_SET_COUNT(bp, count);
- XFS_BUF_SET_FSPRIVATE(bp, iclog); /* save for later */
+ bp->b_fspriv = iclog;
XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_LOG_BUFFER;
+ bp->b_flags |= XBF_SYNCIO;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
+ bp->b_flags |= XBF_FUA;
+
/*
- * If we have an external log device, flush the data device
- * before flushing the log to make sure all meta data
- * written back from the AIL actually made it to disk
- * before writing out the new log tail LSN in the log buffer.
+ * Flush the data device before flushing the log to make
+ * sure all meta data written back from the AIL actually made
+ * it to disk before stamping the new log tail LSN into the
+ * log buffer. For an external log we need to issue the
+ * flush explicitly, and unfortunately synchronously here;
+ * for an internal log we can simply use the block layer
+ * state machine for preflushes.
*/
if (log->l_mp->m_logdev_targp != log->l_mp->m_ddev_targp)
xfs_blkdev_issue_flush(log->l_mp->m_ddev_targp);
- XFS_BUF_ORDERED(bp);
+ else
+ bp->b_flags |= XBF_FLUSH;
}
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
@@ -1397,27 +1510,23 @@ xlog_sync(xlog_t *log,
*/
XFS_BUF_WRITE(bp);
- if ((error = xlog_bdstrat(bp))) {
- xfs_ioerror_alert("xlog_sync", log->l_mp, bp,
- XFS_BUF_ADDR(bp));
+ error = xlog_bdstrat(bp);
+ if (error) {
+ xfs_buf_ioerror_alert(bp, "xlog_sync");
return error;
}
if (split) {
bp = iclog->ic_log->l_xbuf;
- ASSERT(XFS_BUF_FSPRIVATE2(bp, unsigned long) ==
- (unsigned long)1);
- XFS_BUF_SET_FSPRIVATE2(bp, (unsigned long)2);
XFS_BUF_SET_ADDR(bp, 0); /* logical 0 */
- XFS_BUF_SET_PTR(bp, (xfs_caddr_t)((__psint_t)&(iclog->ic_header)+
- (__psint_t)count), split);
- XFS_BUF_SET_FSPRIVATE(bp, iclog);
+ xfs_buf_associate_memory(bp,
+ (char *)&iclog->ic_header + count, split);
+ bp->b_fspriv = iclog;
XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_LOG_BUFFER;
+ bp->b_flags |= XBF_SYNCIO;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
- XFS_BUF_ORDERED(bp);
- dptr = XFS_BUF_PTR(bp);
+ bp->b_flags |= XBF_FUA;
+ dptr = bp->b_addr;
/*
* Bump the cycle numbers at the start of each block
* since this part of the buffer is at the start of
@@ -1437,9 +1546,9 @@ xlog_sync(xlog_t *log,
/* account for internal log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
XFS_BUF_WRITE(bp);
- if ((error = xlog_bdstrat(bp))) {
- xfs_ioerror_alert("xlog_sync (split)", log->l_mp,
- bp, XFS_BUF_ADDR(bp));
+ error = xlog_bdstrat(bp);
+ if (error) {
+ xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
return error;
}
}
@@ -2495,8 +2604,8 @@ restart:
/*
* Atomically get the log space required for a log ticket.
*
- * Once a ticket gets put onto the reserveq, it will only return after
- * the needed reservation is satisfied.
+ * Once a ticket gets put onto the reserveq, it will only return after the
+ * needed reservation is satisfied.
*
* This function is structured so that it has a lock free fast path. This is
* necessary because every new transaction reservation will come through this
@@ -2504,113 +2613,53 @@ restart:
* every pass.
*
* As tickets are only ever moved on and off the reserveq under the
- * l_grant_reserve_lock, we only need to take that lock if we are going
- * to add the ticket to the queue and sleep. We can avoid taking the lock if the
- * ticket was never added to the reserveq because the t_queue list head will be
- * empty and we hold the only reference to it so it can safely be checked
- * unlocked.
+ * l_grant_reserve_lock, we only need to take that lock if we are going to add
+ * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
+ * was never added to the reserveq because the t_queue list head will be empty
+ * and we hold the only reference to it so it can safely be checked unlocked.
*/
STATIC int
-xlog_grant_log_space(xlog_t *log,
- xlog_ticket_t *tic)
+xlog_grant_log_space(
+ struct log *log,
+ struct xlog_ticket *tic)
{
- int free_bytes;
- int need_bytes;
+ int free_bytes, need_bytes;
+ int error = 0;
-#ifdef DEBUG
- if (log->l_flags & XLOG_ACTIVE_RECOVERY)
- panic("grant Recovery problem");
-#endif
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
trace_xfs_log_grant_enter(log, tic);
+ /*
+ * If there are other waiters on the queue then give them a chance at
+ * logspace before us. Wake up the first waiters, if we do not wake
+ * up all the waiters then go to sleep waiting for more free space,
+ * otherwise try to get some space for this transaction.
+ */
need_bytes = tic->t_unit_res;
if (tic->t_flags & XFS_LOG_PERM_RESERV)
need_bytes *= tic->t_ocnt;
-
- /* something is already sleeping; insert new transaction at end */
- if (!list_empty_careful(&log->l_reserveq)) {
- spin_lock(&log->l_grant_reserve_lock);
- /* recheck the queue now we are locked */
- if (list_empty(&log->l_reserveq)) {
- spin_unlock(&log->l_grant_reserve_lock);
- goto redo;
- }
- list_add_tail(&tic->t_queue, &log->l_reserveq);
-
- trace_xfs_log_grant_sleep1(log, tic);
-
- /*
- * Gotta check this before going to sleep, while we're
- * holding the grant lock.
- */
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- XFS_STATS_INC(xs_sleep_logspace);
- xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
-
- /*
- * If we got an error, and the filesystem is shutting down,
- * we'll catch it down below. So just continue...
- */
- trace_xfs_log_grant_wake1(log, tic);
- }
-
-redo:
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return_unlocked;
-
free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
- if (free_bytes < need_bytes) {
+ if (!list_empty_careful(&log->l_reserveq)) {
spin_lock(&log->l_grant_reserve_lock);
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_reserveq);
-
- trace_xfs_log_grant_sleep2(log, tic);
-
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
-
- trace_xfs_log_grant_wake2(log, tic);
- goto redo;
- }
-
- if (!list_empty(&tic->t_queue)) {
+ if (!xlog_reserveq_wake(log, &free_bytes) ||
+ free_bytes < need_bytes)
+ error = xlog_reserveq_wait(log, tic, need_bytes);
+ spin_unlock(&log->l_grant_reserve_lock);
+ } else if (free_bytes < need_bytes) {
spin_lock(&log->l_grant_reserve_lock);
- list_del_init(&tic->t_queue);
+ error = xlog_reserveq_wait(log, tic, need_bytes);
spin_unlock(&log->l_grant_reserve_lock);
}
+ if (error)
+ return error;
- /* we've got enough space */
xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_grant_exit(log, tic);
xlog_verify_grant_tail(log);
return 0;
-
-error_return_unlocked:
- spin_lock(&log->l_grant_reserve_lock);
-error_return:
- list_del_init(&tic->t_queue);
- spin_unlock(&log->l_grant_reserve_lock);
- trace_xfs_log_grant_error(log, tic);
-
- /*
- * If we are failing, make sure the ticket doesn't have any
- * current reservations. We don't want to add this back when
- * the ticket/transaction gets cancelled.
- */
- tic->t_curr_res = 0;
- tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- return XFS_ERROR(EIO);
-} /* xlog_grant_log_space */
-
+}
/*
* Replenish the byte reservation required by moving the grant write head.
@@ -2619,10 +2668,12 @@ error_return:
* free fast path.
*/
STATIC int
-xlog_regrant_write_log_space(xlog_t *log,
- xlog_ticket_t *tic)
+xlog_regrant_write_log_space(
+ struct log *log,
+ struct xlog_ticket *tic)
{
- int free_bytes, need_bytes;
+ int free_bytes, need_bytes;
+ int error = 0;
tic->t_curr_res = tic->t_unit_res;
xlog_tic_reset_res(tic);
@@ -2630,104 +2681,38 @@ xlog_regrant_write_log_space(xlog_t *log,
if (tic->t_cnt > 0)
return 0;
-#ifdef DEBUG
- if (log->l_flags & XLOG_ACTIVE_RECOVERY)
- panic("regrant Recovery problem");
-#endif
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
trace_xfs_log_regrant_write_enter(log, tic);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return_unlocked;
- /* If there are other waiters on the queue then give them a
- * chance at logspace before us. Wake up the first waiters,
- * if we do not wake up all the waiters then go to sleep waiting
- * for more free space, otherwise try to get some space for
- * this transaction.
+ /*
+ * If there are other waiters on the queue then give them a chance at
+ * logspace before us. Wake up the first waiters, if we do not wake
+ * up all the waiters then go to sleep waiting for more free space,
+ * otherwise try to get some space for this transaction.
*/
need_bytes = tic->t_unit_res;
- if (!list_empty_careful(&log->l_writeq)) {
- struct xlog_ticket *ntic;
-
- spin_lock(&log->l_grant_write_lock);
- free_bytes = xlog_space_left(log, &log->l_grant_write_head);
- list_for_each_entry(ntic, &log->l_writeq, t_queue) {
- ASSERT(ntic->t_flags & XLOG_TIC_PERM_RESERV);
-
- if (free_bytes < ntic->t_unit_res)
- break;
- free_bytes -= ntic->t_unit_res;
- wake_up(&ntic->t_wait);
- }
-
- if (ntic != list_first_entry(&log->l_writeq,
- struct xlog_ticket, t_queue)) {
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_writeq);
- trace_xfs_log_regrant_write_sleep1(log, tic);
-
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
- trace_xfs_log_regrant_write_wake1(log, tic);
- } else
- spin_unlock(&log->l_grant_write_lock);
- }
-
-redo:
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return_unlocked;
-
free_bytes = xlog_space_left(log, &log->l_grant_write_head);
- if (free_bytes < need_bytes) {
+ if (!list_empty_careful(&log->l_writeq)) {
spin_lock(&log->l_grant_write_lock);
- if (list_empty(&tic->t_queue))
- list_add_tail(&tic->t_queue, &log->l_writeq);
-
- if (XLOG_FORCED_SHUTDOWN(log))
- goto error_return;
-
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_regrant_write_sleep2(log, tic);
- xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
-
- trace_xfs_log_regrant_write_wake2(log, tic);
- goto redo;
- }
-
- if (!list_empty(&tic->t_queue)) {
+ if (!xlog_writeq_wake(log, &free_bytes) ||
+ free_bytes < need_bytes)
+ error = xlog_writeq_wait(log, tic, need_bytes);
+ spin_unlock(&log->l_grant_write_lock);
+ } else if (free_bytes < need_bytes) {
spin_lock(&log->l_grant_write_lock);
- list_del_init(&tic->t_queue);
+ error = xlog_writeq_wait(log, tic, need_bytes);
spin_unlock(&log->l_grant_write_lock);
}
- /* we've got enough space */
+ if (error)
+ return error;
+
xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
trace_xfs_log_regrant_write_exit(log, tic);
xlog_verify_grant_tail(log);
return 0;
-
-
- error_return_unlocked:
- spin_lock(&log->l_grant_write_lock);
- error_return:
- list_del_init(&tic->t_queue);
- spin_unlock(&log->l_grant_write_lock);
- trace_xfs_log_regrant_write_error(log, tic);
-
- /*
- * If we are failing, make sure the ticket doesn't have any
- * current reservations. We don't want to add this back when
- * the ticket/transaction gets cancelled.
- */
- tic->t_curr_res = 0;
- tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
- return XFS_ERROR(EIO);
-} /* xlog_regrant_write_log_space */
-
+}
/* The first cnt-1 times through here we don't need to
* move the grant write head because the permanent
@@ -3521,13 +3506,13 @@ xlog_verify_iclog(xlog_t *log,
spin_unlock(&log->l_icloglock);
/* check log magic numbers */
- if (be32_to_cpu(iclog->ic_header.h_magicno) != XLOG_HEADER_MAGIC_NUM)
+ if (iclog->ic_header.h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: invalid magic num", __func__);
ptr = (xfs_caddr_t) &iclog->ic_header;
for (ptr += BBSIZE; ptr < ((xfs_caddr_t)&iclog->ic_header) + count;
ptr += BBSIZE) {
- if (be32_to_cpu(*(__be32 *)ptr) == XLOG_HEADER_MAGIC_NUM)
+ if (*(__be32 *)ptr == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
xfs_emerg(log->l_mp, "%s: unexpected magic num",
__func__);
}
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 78c9039..3f7bf45 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -137,7 +137,7 @@ struct xfs_trans;
void xfs_log_item_init(struct xfs_mount *mp,
struct xfs_log_item *item,
int type,
- struct xfs_item_ops *ops);
+ const struct xfs_item_ops *ops);
xfs_lsn_t xfs_log_done(struct xfs_mount *mp,
struct xlog_ticket *ticket,
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 6a5a1af..86ca506 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -91,6 +91,8 @@ xlog_get_bp(
xlog_t *log,
int nbblks)
{
+ struct xfs_buf *bp;
+
if (!xlog_buf_bbcount_valid(log, nbblks)) {
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
@@ -118,8 +120,10 @@ xlog_get_bp(
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return xfs_buf_get_uncached(log->l_mp->m_logdev_targp,
- BBTOB(nbblks), 0);
+ bp = xfs_buf_get_uncached(log->l_mp->m_logdev_targp, BBTOB(nbblks), 0);
+ if (bp)
+ xfs_buf_unlock(bp);
+ return bp;
}
STATIC void
@@ -143,7 +147,7 @@ xlog_align(
xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
ASSERT(BBTOB(offset + nbblks) <= XFS_BUF_SIZE(bp));
- return XFS_BUF_PTR(bp) + BBTOB(offset);
+ return bp->b_addr + BBTOB(offset);
}
@@ -174,15 +178,12 @@ xlog_bread_noalign(
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
XFS_BUF_READ(bp);
- XFS_BUF_BUSY(bp);
XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
- XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
xfsbdstrat(log->l_mp, bp);
error = xfs_buf_iowait(bp);
if (error)
- xfs_ioerror_alert("xlog_bread", log->l_mp,
- bp, XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp, __func__);
return error;
}
@@ -216,18 +217,18 @@ xlog_bread_offset(
xfs_buf_t *bp,
xfs_caddr_t offset)
{
- xfs_caddr_t orig_offset = XFS_BUF_PTR(bp);
+ xfs_caddr_t orig_offset = bp->b_addr;
int orig_len = bp->b_buffer_length;
int error, error2;
- error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks));
+ error = xfs_buf_associate_memory(bp, offset, BBTOB(nbblks));
if (error)
return error;
error = xlog_bread_noalign(log, blk_no, nbblks, bp);
/* must reset buffer pointer even on error */
- error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len);
+ error2 = xfs_buf_associate_memory(bp, orig_offset, orig_len);
if (error)
return error;
return error2;
@@ -262,15 +263,14 @@ xlog_bwrite(
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_BUSY(bp);
- XFS_BUF_HOLD(bp);
- XFS_BUF_PSEMA(bp, PRIBIO);
+ xfs_buf_hold(bp);
+ xfs_buf_lock(bp);
XFS_BUF_SET_COUNT(bp, BBTOB(nbblks));
- XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp);
- if ((error = xfs_bwrite(log->l_mp, bp)))
- xfs_ioerror_alert("xlog_bwrite", log->l_mp,
- bp, XFS_BUF_ADDR(bp));
+ error = xfs_bwrite(bp);
+ if (error)
+ xfs_buf_ioerror_alert(bp, __func__);
+ xfs_buf_relse(bp);
return error;
}
@@ -300,14 +300,14 @@ xlog_header_check_recover(
xfs_mount_t *mp,
xlog_rec_header_t *head)
{
- ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
+ ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
/*
* IRIX doesn't write the h_fmt field and leaves it zeroed
* (XLOG_FMT_UNKNOWN). This stops us from trying to recover
* a dirty log created in IRIX.
*/
- if (unlikely(be32_to_cpu(head->h_fmt) != XLOG_FMT)) {
+ if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
xfs_warn(mp,
"dirty log written in incompatible format - can't recover");
xlog_header_check_dump(mp, head);
@@ -333,7 +333,7 @@ xlog_header_check_mount(
xfs_mount_t *mp,
xlog_rec_header_t *head)
{
- ASSERT(be32_to_cpu(head->h_magicno) == XLOG_HEADER_MAGIC_NUM);
+ ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM));
if (uuid_is_nil(&head->h_fs_uuid)) {
/*
@@ -356,18 +356,16 @@ STATIC void
xlog_recover_iodone(
struct xfs_buf *bp)
{
- if (XFS_BUF_GETERROR(bp)) {
+ if (bp->b_error) {
/*
* We're not going to bother about retrying
* this during recovery. One strike!
*/
- xfs_ioerror_alert("xlog_recover_iodone",
- bp->b_target->bt_mount, bp,
- XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp, __func__);
xfs_force_shutdown(bp->b_target->bt_mount,
SHUTDOWN_META_IO_ERROR);
}
- XFS_BUF_CLR_IODONE_FUNC(bp);
+ bp->b_iodone = NULL;
xfs_buf_ioend(bp, 0);
}
@@ -534,7 +532,7 @@ xlog_find_verify_log_record(
head = (xlog_rec_header_t *)offset;
- if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(head->h_magicno))
+ if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM))
break;
if (!smallmem)
@@ -916,7 +914,7 @@ xlog_find_tail(
if (error)
goto done;
- if (XLOG_HEADER_MAGIC_NUM == be32_to_cpu(*(__be32 *)offset)) {
+ if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
found = 1;
break;
}
@@ -933,8 +931,8 @@ xlog_find_tail(
if (error)
goto done;
- if (XLOG_HEADER_MAGIC_NUM ==
- be32_to_cpu(*(__be32 *)offset)) {
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
found = 2;
break;
}
@@ -1258,7 +1256,7 @@ xlog_write_log_records(
*/
ealign = round_down(end_block, sectbb);
if (j == 0 && (start_block + endcount > ealign)) {
- offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block);
+ offset = bp->b_addr + BBTOB(ealign - start_block);
error = xlog_bread_offset(log, ealign, sectbb,
bp, offset);
if (error)
@@ -1947,7 +1945,7 @@ xfs_qm_dqcheck(
* This is all fine; things are still consistent, and we haven't lost
* any quota information. Just don't complain about bad dquot blks.
*/
- if (be16_to_cpu(ddq->d_magic) != XFS_DQUOT_MAGIC) {
+ if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) {
if (flags & XFS_QMOPT_DOWARN)
xfs_alert(mp,
"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x",
@@ -2131,15 +2129,15 @@ xlog_recover_buffer_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
buf_flags);
- if (XFS_BUF_ISERROR(bp)) {
- xfs_ioerror_alert("xlog_recover_do..(read#1)", mp,
- bp, buf_f->blf_blkno);
- error = XFS_BUF_GETERROR(bp);
+ if (!bp)
+ return XFS_ERROR(ENOMEM);
+ error = bp->b_error;
+ if (error) {
+ xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
xfs_buf_relse(bp);
return error;
}
- error = 0;
if (buf_f->blf_flags & XFS_BLF_INODE_BUF) {
error = xlog_recover_do_inode_buffer(mp, item, bp, buf_f);
} else if (buf_f->blf_flags &
@@ -2170,15 +2168,16 @@ xlog_recover_buffer_pass2(
be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&
(XFS_BUF_COUNT(bp) != MAX(log->l_mp->m_sb.sb_blocksize,
(__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) {
- XFS_BUF_STALE(bp);
- error = xfs_bwrite(mp, bp);
+ xfs_buf_stale(bp);
+ error = xfs_bwrite(bp);
} else {
ASSERT(bp->b_target->bt_mount == mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
- xfs_bdwrite(mp, bp);
+ bp->b_iodone = xlog_recover_iodone;
+ xfs_buf_delwri_queue(bp);
}
- return (error);
+ xfs_buf_relse(bp);
+ return error;
}
STATIC int
@@ -2223,14 +2222,16 @@ xlog_recover_inode_pass2(
bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
XBF_LOCK);
- if (XFS_BUF_ISERROR(bp)) {
- xfs_ioerror_alert("xlog_recover_do..(read#2)", mp,
- bp, in_f->ilf_blkno);
- error = XFS_BUF_GETERROR(bp);
+ if (!bp) {
+ error = ENOMEM;
+ goto error;
+ }
+ error = bp->b_error;
+ if (error) {
+ xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
xfs_buf_relse(bp);
goto error;
}
- error = 0;
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
@@ -2238,7 +2239,7 @@ xlog_recover_inode_pass2(
* Make sure the place we're flushing out to really looks
* like an inode!
*/
- if (unlikely(be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)) {
+ if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
xfs_buf_relse(bp);
xfs_alert(mp,
"%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
@@ -2279,7 +2280,7 @@ xlog_recover_inode_pass2(
/* Take the opportunity to reset the flush iteration count */
dicp->di_flushiter = 0;
- if (unlikely((dicp->di_mode & S_IFMT) == S_IFREG)) {
+ if (unlikely(S_ISREG(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
@@ -2292,7 +2293,7 @@ xlog_recover_inode_pass2(
error = EFSCORRUPTED;
goto error;
}
- } else if (unlikely((dicp->di_mode & S_IFMT) == S_IFDIR)) {
+ } else if (unlikely(S_ISDIR(dicp->di_mode))) {
if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
(dicp->di_format != XFS_DINODE_FMT_BTREE) &&
(dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
@@ -2434,8 +2435,9 @@ xlog_recover_inode_pass2(
write_inode_buffer:
ASSERT(bp->b_target->bt_mount == mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
- xfs_bdwrite(mp, bp);
+ bp->b_iodone = xlog_recover_iodone;
+ xfs_buf_delwri_queue(bp);
+ xfs_buf_relse(bp);
error:
if (need_free)
kmem_free(in_f);
@@ -2533,8 +2535,7 @@ xlog_recover_dquot_pass2(
XFS_FSB_TO_BB(mp, dq_f->qlf_len),
0, &bp);
if (error) {
- xfs_ioerror_alert("xlog_recover_do..(read#3)", mp,
- bp, dq_f->qlf_blkno);
+ xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#3)");
return error;
}
ASSERT(bp);
@@ -2556,8 +2557,9 @@ xlog_recover_dquot_pass2(
ASSERT(dq_f->qlf_size == 2);
ASSERT(bp->b_target->bt_mount == mp);
- XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone);
- xfs_bdwrite(mp, bp);
+ bp->b_iodone = xlog_recover_iodone;
+ xfs_buf_delwri_queue(bp);
+ xfs_buf_relse(bp);
return (0);
}
@@ -3284,7 +3286,7 @@ xlog_valid_rec_header(
{
int hlen;
- if (unlikely(be32_to_cpu(rhead->h_magicno) != XLOG_HEADER_MAGIC_NUM)) {
+ if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
XFS_ERRLEVEL_LOW, log->l_mp);
return XFS_ERROR(EFSCORRUPTED);
@@ -3422,7 +3424,7 @@ xlog_do_recovery_pass(
/*
* Check for header wrapping around physical end-of-log
*/
- offset = XFS_BUF_PTR(hbp);
+ offset = hbp->b_addr;
split_hblks = 0;
wrapped_hblks = 0;
if (blk_no + hblks <= log->l_logBBsize) {
@@ -3482,7 +3484,7 @@ xlog_do_recovery_pass(
} else {
/* This log record is split across the
* physical end of log */
- offset = XFS_BUF_PTR(dbp);
+ offset = dbp->b_addr;
split_bblks = 0;
if (blk_no != log->l_logBBsize) {
/* some data is before the physical
@@ -3641,7 +3643,7 @@ xlog_do_recover(
return error;
}
- XFS_bflush(log->l_mp->m_ddev_targp);
+ xfs_flush_buftarg(log->l_mp->m_ddev_targp, 1);
/*
* If IO errors happened during recovery, bail out.
@@ -3674,8 +3676,7 @@ xlog_do_recover(
xfsbdstrat(log->l_mp, bp);
error = xfs_buf_iowait(bp);
if (error) {
- xfs_ioerror_alert("xlog_do_recover",
- log->l_mp, bp, XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp, __func__);
ASSERT(0);
xfs_buf_relse(bp);
return error;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 9afdd49..d06afbc 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -345,7 +345,7 @@ xfs_mount_validate_sb(
}
/*
- * More sanity checking. These were stolen directly from
+ * More sanity checking. Most of these were stolen directly from
* xfs_repair.
*/
if (unlikely(
@@ -368,23 +368,13 @@ xfs_mount_validate_sb(
(sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) ||
(sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) ||
(sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) ||
- (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */))) {
+ (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */) ||
+ sbp->sb_dblocks == 0 ||
+ sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp) ||
+ sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) {
if (loud)
- xfs_warn(mp, "SB sanity check 1 failed");
- return XFS_ERROR(EFSCORRUPTED);
- }
-
- /*
- * Sanity check AG count, size fields against data size field
- */
- if (unlikely(
- sbp->sb_dblocks == 0 ||
- sbp->sb_dblocks >
- (xfs_drfsbno_t)sbp->sb_agcount * sbp->sb_agblocks ||
- sbp->sb_dblocks < (xfs_drfsbno_t)(sbp->sb_agcount - 1) *
- sbp->sb_agblocks + XFS_MIN_AG_BLOCKS)) {
- if (loud)
- xfs_warn(mp, "SB sanity check 2 failed");
+ XFS_CORRUPTION_ERROR("SB sanity check failed",
+ XFS_ERRLEVEL_LOW, mp, sbp);
return XFS_ERROR(EFSCORRUPTED);
}
@@ -861,7 +851,8 @@ xfs_update_alignment(xfs_mount_t *mp)
if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
(BBTOB(mp->m_swidth) & mp->m_blockmask)) {
if (mp->m_flags & XFS_MOUNT_RETERR) {
- xfs_warn(mp, "alignment check 1 failed");
+ xfs_warn(mp, "alignment check failed: "
+ "(sunit/swidth vs. blocksize)");
return XFS_ERROR(EINVAL);
}
mp->m_dalign = mp->m_swidth = 0;
@@ -872,6 +863,8 @@ xfs_update_alignment(xfs_mount_t *mp)
mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
if (mp->m_flags & XFS_MOUNT_RETERR) {
+ xfs_warn(mp, "alignment check failed: "
+ "(sunit/swidth vs. ag size)");
return XFS_ERROR(EINVAL);
}
xfs_warn(mp,
@@ -886,8 +879,8 @@ xfs_update_alignment(xfs_mount_t *mp)
mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
if (mp->m_flags & XFS_MOUNT_RETERR) {
- xfs_warn(mp,
- "stripe alignment turned off: sunit(%d) less than bsize(%d)",
+ xfs_warn(mp, "alignment check failed: "
+ "sunit(%d) less than bsize(%d)",
mp->m_dalign,
mp->m_blockmask +1);
return XFS_ERROR(EINVAL);
@@ -1093,10 +1086,6 @@ xfs_mount_reset_sbqflags(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return 0;
-#ifdef QUOTADEBUG
- xfs_notice(mp, "Writing superblock quota changes");
-#endif
-
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
XFS_DEFAULT_LOG_COUNT);
@@ -1339,7 +1328,7 @@ xfs_mountfs(
ASSERT(rip != NULL);
- if (unlikely((rip->i_d.di_mode & S_IFMT) != S_IFDIR)) {
+ if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
xfs_warn(mp, "corrupted root inode %llu: not a directory",
(unsigned long long)rip->i_ino);
xfs_iunlock(rip, XFS_ILOCK_EXCL);
@@ -1492,7 +1481,7 @@ xfs_unmountfs(
* state as much as possible.
*/
xfs_reclaim_inodes(mp, 0);
- XFS_bflush(mp->m_ddev_targp);
+ xfs_flush_buftarg(mp->m_ddev_targp, 1);
xfs_reclaim_inodes(mp, SYNC_WAIT);
xfs_qm_unmount(mp);
@@ -1524,7 +1513,7 @@ xfs_unmountfs(
xfs_warn(mp, "Unable to free reserved block pool. "
"Freespace may not be correct on next mount.");
- error = xfs_log_sbcount(mp, 1);
+ error = xfs_log_sbcount(mp);
if (error)
xfs_warn(mp, "Unable to update superblock counters. "
"Freespace may not be correct on next mount.");
@@ -1559,18 +1548,14 @@ xfs_fs_writable(xfs_mount_t *mp)
/*
* xfs_log_sbcount
*
- * Called either periodically to keep the on disk superblock values
- * roughly up to date or from unmount to make sure the values are
- * correct on a clean unmount.
+ * Sync the superblock counters to disk.
*
* Note this code can be called during the process of freezing, so
- * we may need to use the transaction allocator which does not not
+ * we may need to use the transaction allocator which does not
* block when the transaction subsystem is in its frozen state.
*/
int
-xfs_log_sbcount(
- xfs_mount_t *mp,
- uint sync)
+xfs_log_sbcount(xfs_mount_t *mp)
{
xfs_trans_t *tp;
int error;
@@ -1596,8 +1581,7 @@ xfs_log_sbcount(
}
xfs_mod_sb(tp, XFS_SB_IFREE | XFS_SB_ICOUNT | XFS_SB_FDBLOCKS);
- if (sync)
- xfs_trans_set_sync(tp);
+ xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
return error;
}
@@ -1619,15 +1603,14 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
XFS_BUF_UNDONE(sbp);
XFS_BUF_UNREAD(sbp);
- XFS_BUF_UNDELAYWRITE(sbp);
+ xfs_buf_delwri_dequeue(sbp);
XFS_BUF_WRITE(sbp);
XFS_BUF_UNASYNC(sbp);
- ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp);
+ ASSERT(sbp->b_target == mp->m_ddev_targp);
xfsbdstrat(mp, sbp);
error = xfs_buf_iowait(sbp);
if (error)
- xfs_ioerror_alert("xfs_unmountfs_writesb",
- mp, sbp, XFS_BUF_ADDR(sbp));
+ xfs_buf_ioerror_alert(sbp, __func__);
xfs_buf_relse(sbp);
}
return error;
@@ -1932,23 +1915,20 @@ unwind:
* the superblock buffer if it can be locked without sleeping.
* If it can't then we'll return NULL.
*/
-xfs_buf_t *
+struct xfs_buf *
xfs_getsb(
- xfs_mount_t *mp,
- int flags)
+ struct xfs_mount *mp,
+ int flags)
{
- xfs_buf_t *bp;
+ struct xfs_buf *bp = mp->m_sb_bp;
- ASSERT(mp->m_sb_bp != NULL);
- bp = mp->m_sb_bp;
- if (flags & XBF_TRYLOCK) {
- if (!XFS_BUF_CPSEMA(bp)) {
+ if (!xfs_buf_trylock(bp)) {
+ if (flags & XBF_TRYLOCK)
return NULL;
- }
- } else {
- XFS_BUF_PSEMA(bp, PRIBIO);
+ xfs_buf_lock(bp);
}
- XFS_BUF_HOLD(bp);
+
+ xfs_buf_hold(bp);
ASSERT(XFS_BUF_ISDONE(bp));
return bp;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 3d68bb2..bb24dac 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -371,7 +371,7 @@ typedef struct xfs_mod_sb {
int64_t msb_delta; /* Change to make to specified field */
} xfs_mod_sb_t;
-extern int xfs_log_sbcount(xfs_mount_t *, uint);
+extern int xfs_log_sbcount(xfs_mount_t *);
extern __uint64_t xfs_default_resblks(xfs_mount_t *mp);
extern int xfs_mountfs(xfs_mount_t *mp);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index 77a5989..866de27 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -116,7 +116,7 @@ xfs_rename(
trace_xfs_rename(src_dp, target_dp, src_name, target_name);
new_parent = (src_dp != target_dp);
- src_is_directory = ((src_ip->i_d.di_mode & S_IFMT) == S_IFDIR);
+ src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
if (src_is_directory) {
/*
@@ -170,12 +170,12 @@ xfs_rename(
* we can rely on either trans_commit or trans_cancel to unlock
* them.
*/
- xfs_trans_ijoin_ref(tp, src_dp, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, src_dp, XFS_ILOCK_EXCL);
if (new_parent)
- xfs_trans_ijoin_ref(tp, target_dp, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, src_ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, target_dp, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, src_ip, XFS_ILOCK_EXCL);
if (target_ip)
- xfs_trans_ijoin_ref(tp, target_ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, target_ip, XFS_ILOCK_EXCL);
/*
* If we are using project inheritance, we only allow renames
@@ -226,7 +226,7 @@ xfs_rename(
* target and source are directories and that target can be
* destroyed, or that neither is a directory.
*/
- if ((target_ip->i_d.di_mode & S_IFMT) == S_IFDIR) {
+ if (S_ISDIR(target_ip->i_d.di_mode)) {
/*
* Make sure target dir is empty.
*/
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 8f76fdf..87323f1 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -112,7 +112,7 @@ xfs_growfs_rt_alloc(
* Lock the inode.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_bmap_init(&flist, &firstblock);
/*
@@ -120,9 +120,9 @@ xfs_growfs_rt_alloc(
*/
nmap = 1;
cancelflags |= XFS_TRANS_ABORT;
- error = xfs_bmapi(tp, ip, oblocks, nblocks - oblocks,
- XFS_BMAPI_WRITE | XFS_BMAPI_METADATA, &firstblock,
- resblks, &map, &nmap, &flist);
+ error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
+ XFS_BMAPI_METADATA, &firstblock,
+ resblks, &map, &nmap, &flist);
if (!error && nmap < 1)
error = XFS_ERROR(ENOSPC);
if (error)
@@ -155,7 +155,7 @@ xfs_growfs_rt_alloc(
* Lock the bitmap inode.
*/
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
* Get a buffer for the block.
*/
@@ -168,7 +168,7 @@ error_cancel:
xfs_trans_cancel(tp, cancelflags);
goto error;
}
- memset(XFS_BUF_PTR(bp), 0, mp->m_sb.sb_blocksize);
+ memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
/*
* Commit the transaction.
@@ -856,34 +856,24 @@ xfs_rtbuf_get(
xfs_buf_t **bpp) /* output: buffer for the block */
{
xfs_buf_t *bp; /* block buffer, result */
- xfs_daddr_t d; /* disk addr of block */
- int error; /* error value */
- xfs_fsblock_t fsb; /* fs block number for block */
xfs_inode_t *ip; /* bitmap or summary inode */
+ xfs_bmbt_irec_t map;
+ int nmap;
+ int error; /* error value */
ip = issum ? mp->m_rsumip : mp->m_rbmip;
- /*
- * Map from the file offset (block) and inode number to the
- * file system block.
- */
- error = xfs_bmapi_single(tp, ip, XFS_DATA_FORK, &fsb, block);
- if (error) {
+
+ error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK);
+ if (error)
return error;
- }
- ASSERT(fsb != NULLFSBLOCK);
- /*
- * Convert to disk address for buffer cache.
- */
- d = XFS_FSB_TO_DADDR(mp, fsb);
- /*
- * Read the buffer.
- */
- error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d,
+
+ ASSERT(map.br_startblock != NULLFSBLOCK);
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
mp->m_bsize, 0, &bp);
- if (error) {
+ if (error)
return error;
- }
- ASSERT(bp && !XFS_BUF_GETERROR(bp));
+ ASSERT(!xfs_buf_geterror(bp));
*bpp = bp;
return 0;
}
@@ -943,7 +933,7 @@ xfs_rtcheck_range(
if (error) {
return error;
}
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ bufp = bp->b_addr;
/*
* Compute the starting word's address, and starting bit.
*/
@@ -994,7 +984,7 @@ xfs_rtcheck_range(
if (error) {
return error;
}
- b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ b = bufp = bp->b_addr;
word = 0;
} else {
/*
@@ -1040,7 +1030,7 @@ xfs_rtcheck_range(
if (error) {
return error;
}
- b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ b = bufp = bp->b_addr;
word = 0;
} else {
/*
@@ -1158,7 +1148,7 @@ xfs_rtfind_back(
if (error) {
return error;
}
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ bufp = bp->b_addr;
/*
* Get the first word's index & point to it.
*/
@@ -1210,7 +1200,7 @@ xfs_rtfind_back(
if (error) {
return error;
}
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ bufp = bp->b_addr;
word = XFS_BLOCKWMASK(mp);
b = &bufp[word];
} else {
@@ -1256,7 +1246,7 @@ xfs_rtfind_back(
if (error) {
return error;
}
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ bufp = bp->b_addr;
word = XFS_BLOCKWMASK(mp);
b = &bufp[word];
} else {
@@ -1333,7 +1323,7 @@ xfs_rtfind_forw(
if (error) {
return error;
}
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ bufp = bp->b_addr;
/*
* Get the first word's index & point to it.
*/
@@ -1384,7 +1374,7 @@ xfs_rtfind_forw(
if (error) {
return error;
}
- b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ b = bufp = bp->b_addr;
word = 0;
} else {
/*
@@ -1429,7 +1419,7 @@ xfs_rtfind_forw(
if (error) {
return error;
}
- b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ b = bufp = bp->b_addr;
word = 0;
} else {
/*
@@ -1649,7 +1639,7 @@ xfs_rtmodify_range(
if (error) {
return error;
}
- bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ bufp = bp->b_addr;
/*
* Compute the starting word's address, and starting bit.
*/
@@ -1694,7 +1684,7 @@ xfs_rtmodify_range(
if (error) {
return error;
}
- first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ first = b = bufp = bp->b_addr;
word = 0;
} else {
/*
@@ -1734,7 +1724,7 @@ xfs_rtmodify_range(
if (error) {
return error;
}
- first = b = bufp = (xfs_rtword_t *)XFS_BUF_PTR(bp);
+ first = b = bufp = bp->b_addr;
word = 0;
} else {
/*
@@ -1832,8 +1822,8 @@ xfs_rtmodify_summary(
*/
sp = XFS_SUMPTR(mp, bp, so);
*sp += delta;
- xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)XFS_BUF_PTR(bp)),
- (uint)((char *)sp - (char *)XFS_BUF_PTR(bp) + sizeof(*sp) - 1));
+ xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr),
+ (uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1));
return 0;
}
@@ -1970,7 +1960,7 @@ xfs_growfs_rt(
* Lock out other callers by grabbing the bitmap inode lock.
*/
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
/*
* Update the bitmap inode's size.
*/
@@ -1982,7 +1972,7 @@ xfs_growfs_rt(
* Get the summary inode into the transaction.
*/
xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
* Update the summary inode's size.
*/
@@ -2153,7 +2143,7 @@ xfs_rtfree_extent(
* Synchronize by locking the bitmap inode.
*/
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, mp->m_rbmip, XFS_ILOCK_EXCL);
#if defined(__KERNEL__) && defined(DEBUG)
/*
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 09e1f4f..f7f3a35 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -47,7 +47,7 @@ struct xfs_trans;
#define XFS_SUMOFFSTOBLOCK(mp,s) \
(((s) * (uint)sizeof(xfs_suminfo_t)) >> (mp)->m_sb.sb_blocklog)
#define XFS_SUMPTR(mp,bp,so) \
- ((xfs_suminfo_t *)((char *)XFS_BUF_PTR(bp) + \
+ ((xfs_suminfo_t *)((bp)->b_addr + \
(((so) * (uint)sizeof(xfs_suminfo_t)) & XFS_BLOCKMASK(mp))))
#define XFS_BITTOBLOCK(mp,bi) ((bi) >> (mp)->m_blkbit_log)
diff --git a/fs/xfs/xfs_rw.c b/fs/xfs/xfs_rw.c
index d6d6fdf..597d044 100644
--- a/fs/xfs/xfs_rw.c
+++ b/fs/xfs/xfs_rw.c
@@ -92,24 +92,6 @@ xfs_do_force_shutdown(
}
/*
- * Prints out an ALERT message about I/O error.
- */
-void
-xfs_ioerror_alert(
- char *func,
- struct xfs_mount *mp,
- xfs_buf_t *bp,
- xfs_daddr_t blkno)
-{
- xfs_alert(mp,
- "I/O error occurred: meta-data dev %s block 0x%llx"
- " (\"%s\") error %d buf count %zd",
- XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
- (__uint64_t)blkno, func,
- XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
-}
-
-/*
* This isn't an absolute requirement, but it is
* just a good idea to call xfs_read_buf instead of
* directly doing a read_buf call. For one, we shouldn't
@@ -137,20 +119,19 @@ xfs_read_buf(
bp = xfs_buf_read(target, blkno, len, flags);
if (!bp)
return XFS_ERROR(EIO);
- error = XFS_BUF_GETERROR(bp);
- if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
+ error = bp->b_error;
+ if (!error && !XFS_FORCED_SHUTDOWN(mp)) {
*bpp = bp;
} else {
*bpp = NULL;
if (error) {
- xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp, __func__);
} else {
error = XFS_ERROR(EIO);
}
if (bp) {
XFS_BUF_UNDONE(bp);
- XFS_BUF_UNDELAYWRITE(bp);
- XFS_BUF_STALE(bp);
+ xfs_buf_stale(bp);
/*
* brelse clears B_ERROR and b_error
*/
diff --git a/fs/xfs/xfs_rw.h b/fs/xfs/xfs_rw.h
index 11c41ec..bbdb9ad 100644
--- a/fs/xfs/xfs_rw.h
+++ b/fs/xfs/xfs_rw.h
@@ -42,8 +42,6 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
extern int xfs_read_buf(struct xfs_mount *mp, xfs_buftarg_t *btp,
xfs_daddr_t blkno, int len, uint flags,
struct xfs_buf **bpp);
-extern void xfs_ioerror_alert(char *func, struct xfs_mount *mp,
- xfs_buf_t *bp, xfs_daddr_t blkno);
extern xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
#endif /* __XFS_RW_H__ */
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h
index 1eb2ba5..cb6ae71 100644
--- a/fs/xfs/xfs_sb.h
+++ b/fs/xfs/xfs_sb.h
@@ -509,7 +509,7 @@ static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp)
#define XFS_SB_DADDR ((xfs_daddr_t)0) /* daddr in filesystem/ag */
#define XFS_SB_BLOCK(mp) XFS_HDR_BLOCK(mp, XFS_SB_DADDR)
-#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)XFS_BUF_PTR(bp))
+#define XFS_BUF_TO_SBP(bp) ((xfs_dsb_t *)((bp)->b_addr))
#define XFS_HDR_BLOCK(mp,d) ((xfs_agblock_t)XFS_BB_TO_FSBT(mp,d))
#define XFS_DADDR_TO_FSB(mp,d) XFS_AGB_TO_FSB(mp, \
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index efc147f..1f35b2f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1790,9 +1790,7 @@ xfs_trans_commit_cil(
}
/*
- * xfs_trans_commit
- *
- * Commit the given transaction to the log a/synchronously.
+ * Commit the given transaction to the log.
*
* XFS disk error handling mechanism is not based on a typical
* transaction abort mechanism. Logically after the filesystem
@@ -1804,10 +1802,9 @@ xfs_trans_commit_cil(
* Do not reference the transaction structure after this call.
*/
int
-_xfs_trans_commit(
+xfs_trans_commit(
struct xfs_trans *tp,
- uint flags,
- int *log_flushed)
+ uint flags)
{
struct xfs_mount *mp = tp->t_mountp;
xfs_lsn_t commit_lsn = -1;
@@ -1866,7 +1863,7 @@ _xfs_trans_commit(
if (sync) {
if (!error) {
error = _xfs_log_force_lsn(mp, commit_lsn,
- XFS_LOG_SYNC, log_flushed);
+ XFS_LOG_SYNC, NULL);
}
XFS_STATS_INC(xs_trans_sync);
} else {
@@ -2021,6 +2018,6 @@ xfs_trans_roll(
if (error)
return error;
- xfs_trans_ijoin(trans, dp);
+ xfs_trans_ijoin(trans, dp, 0);
return 0;
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 53597f4..3ae713c 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -326,7 +326,7 @@ typedef struct xfs_log_item {
struct xfs_log_item *);
/* buffer item iodone */
/* callback func */
- struct xfs_item_ops *li_ops; /* function list */
+ const struct xfs_item_ops *li_ops; /* function list */
/* delayed logging */
struct list_head li_cil; /* CIL pointers */
@@ -341,7 +341,7 @@ typedef struct xfs_log_item {
{ XFS_LI_IN_AIL, "IN_AIL" }, \
{ XFS_LI_ABORTED, "ABORTED" }
-typedef struct xfs_item_ops {
+struct xfs_item_ops {
uint (*iop_size)(xfs_log_item_t *);
void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *);
void (*iop_pin)(xfs_log_item_t *);
@@ -352,7 +352,7 @@ typedef struct xfs_item_ops {
void (*iop_push)(xfs_log_item_t *);
bool (*iop_pushbuf)(xfs_log_item_t *);
void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);
-} xfs_item_ops_t;
+};
#define IOP_SIZE(ip) (*(ip)->li_ops->iop_size)(ip)
#define IOP_FORMAT(ip,vp) (*(ip)->li_ops->iop_format)(ip, vp)
@@ -470,8 +470,7 @@ void xfs_trans_stale_inode_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
-void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
-void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
+void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *, uint);
void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
void xfs_trans_log_inode(xfs_trans_t *, struct xfs_inode *, uint);
struct xfs_efi_log_item *xfs_trans_get_efi(xfs_trans_t *, uint);
@@ -487,10 +486,7 @@ void xfs_trans_log_efd_extent(xfs_trans_t *,
struct xfs_efd_log_item *,
xfs_fsblock_t,
xfs_extlen_t);
-int _xfs_trans_commit(xfs_trans_t *,
- uint flags,
- int *);
-#define xfs_trans_commit(tp, flags) _xfs_trans_commit(tp, flags, NULL)
+int xfs_trans_commit(xfs_trans_t *, uint flags);
void xfs_trans_cancel(xfs_trans_t *, int);
int xfs_trans_ail_init(struct xfs_mount *);
void xfs_trans_ail_destroy(struct xfs_mount *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index a4c281b..ed9252b 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -26,6 +26,7 @@
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_trans_priv.h"
+#include "xfs_trace.h"
#include "xfs_error.h"
#ifdef DEBUG
@@ -161,17 +162,11 @@ xfs_ail_max_lsn(
}
/*
- * AIL traversal cursor initialisation.
- *
- * The cursor keeps track of where our current traversal is up
- * to by tracking the next ƣtem in the list for us. However, for
- * this to be safe, removing an object from the AIL needs to invalidate
- * any cursor that points to it. hence the traversal cursor needs to
- * be linked to the struct xfs_ail so that deletion can search all the
- * active cursors for invalidation.
- *
- * We don't link the push cursor because it is embedded in the struct
- * xfs_ail and hence easily findable.
+ * The cursor keeps track of where our current traversal is up to by tracking
+ * the next item in the list for us. However, for this to be safe, removing an
+ * object from the AIL needs to invalidate any cursor that points to it. hence
+ * the traversal cursor needs to be linked to the struct xfs_ail so that
+ * deletion can search all the active cursors for invalidation.
*/
STATIC void
xfs_trans_ail_cursor_init(
@@ -179,31 +174,12 @@ xfs_trans_ail_cursor_init(
struct xfs_ail_cursor *cur)
{
cur->item = NULL;
- if (cur == &ailp->xa_cursors)
- return;
-
- cur->next = ailp->xa_cursors.next;
- ailp->xa_cursors.next = cur;
-}
-
-/*
- * Set the cursor to the next item, because when we look
- * up the cursor the current item may have been freed.
- */
-STATIC void
-xfs_trans_ail_cursor_set(
- struct xfs_ail *ailp,
- struct xfs_ail_cursor *cur,
- struct xfs_log_item *lip)
-{
- if (lip)
- cur->item = xfs_ail_next(ailp, lip);
+ list_add_tail(&cur->list, &ailp->xa_cursors);
}
/*
- * Get the next item in the traversal and advance the cursor.
- * If the cursor was invalidated (inidicated by a lip of 1),
- * restart the traversal.
+ * Get the next item in the traversal and advance the cursor. If the cursor
+ * was invalidated (indicated by a lip of 1), restart the traversal.
*/
struct xfs_log_item *
xfs_trans_ail_cursor_next(
@@ -214,45 +190,31 @@ xfs_trans_ail_cursor_next(
if ((__psint_t)lip & 1)
lip = xfs_ail_min(ailp);
- xfs_trans_ail_cursor_set(ailp, cur, lip);
+ if (lip)
+ cur->item = xfs_ail_next(ailp, lip);
return lip;
}
/*
- * Now that the traversal is complete, we need to remove the cursor
- * from the list of traversing cursors. Avoid removing the embedded
- * push cursor, but use the fact it is always present to make the
- * list deletion simple.
+ * When the traversal is complete, we need to remove the cursor from the list
+ * of traversing cursors.
*/
void
xfs_trans_ail_cursor_done(
struct xfs_ail *ailp,
- struct xfs_ail_cursor *done)
+ struct xfs_ail_cursor *cur)
{
- struct xfs_ail_cursor *prev = NULL;
- struct xfs_ail_cursor *cur;
-
- done->item = NULL;
- if (done == &ailp->xa_cursors)
- return;
- prev = &ailp->xa_cursors;
- for (cur = prev->next; cur; prev = cur, cur = prev->next) {
- if (cur == done) {
- prev->next = cur->next;
- break;
- }
- }
- ASSERT(cur);
+ cur->item = NULL;
+ list_del_init(&cur->list);
}
/*
- * Invalidate any cursor that is pointing to this item. This is
- * called when an item is removed from the AIL. Any cursor pointing
- * to this object is now invalid and the traversal needs to be
- * terminated so it doesn't reference a freed object. We set the
- * cursor item to a value of 1 so we can distinguish between an
- * invalidation and the end of the list when getting the next item
- * from the cursor.
+ * Invalidate any cursor that is pointing to this item. This is called when an
+ * item is removed from the AIL. Any cursor pointing to this object is now
+ * invalid and the traversal needs to be terminated so it doesn't reference a
+ * freed object. We set the low bit of the cursor item pointer so we can
+ * distinguish between an invalidation and the end of the list when getting the
+ * next item from the cursor.
*/
STATIC void
xfs_trans_ail_cursor_clear(
@@ -261,8 +223,7 @@ xfs_trans_ail_cursor_clear(
{
struct xfs_ail_cursor *cur;
- /* need to search all cursors */
- for (cur = &ailp->xa_cursors; cur; cur = cur->next) {
+ list_for_each_entry(cur, &ailp->xa_cursors, list) {
if (cur->item == lip)
cur->item = (struct xfs_log_item *)
((__psint_t)cur->item | 1);
@@ -270,9 +231,10 @@ xfs_trans_ail_cursor_clear(
}
/*
- * Initialise the cursor to the first item in the AIL with the given @lsn.
- * This searches the list from lowest LSN to highest. Pass a @lsn of zero
- * to initialise the cursor to the first item in the AIL.
+ * Find the first item in the AIL with the given @lsn by searching in ascending
+ * LSN order and initialise the cursor to point to the next item for a
+ * ascending traversal. Pass a @lsn of zero to initialise the cursor to the
+ * first item in the AIL. Returns NULL if the list is empty.
*/
xfs_log_item_t *
xfs_trans_ail_cursor_first(
@@ -283,26 +245,24 @@ xfs_trans_ail_cursor_first(
xfs_log_item_t *lip;
xfs_trans_ail_cursor_init(ailp, cur);
- lip = xfs_ail_min(ailp);
- if (lsn == 0)
+
+ if (lsn == 0) {
+ lip = xfs_ail_min(ailp);
goto out;
+ }
list_for_each_entry(lip, &ailp->xa_ail, li_ail) {
if (XFS_LSN_CMP(lip->li_lsn, lsn) >= 0)
goto out;
}
- lip = NULL;
+ return NULL;
+
out:
- xfs_trans_ail_cursor_set(ailp, cur, lip);
+ if (lip)
+ cur->item = xfs_ail_next(ailp, lip);
return lip;
}
-/*
- * Initialise the cursor to the last item in the AIL with the given @lsn.
- * This searches the list from highest LSN to lowest. If there is no item with
- * the value of @lsn, then it sets the cursor to the last item with an LSN lower
- * than @lsn.
- */
static struct xfs_log_item *
__xfs_trans_ail_cursor_last(
struct xfs_ail *ailp,
@@ -318,8 +278,10 @@ __xfs_trans_ail_cursor_last(
}
/*
- * Initialise the cursor to the last item in the AIL with the given @lsn.
- * This searches the list from highest LSN to lowest.
+ * Find the last item in the AIL with the given @lsn by searching in descending
+ * LSN order and initialise the cursor to point to that item. If there is no
+ * item with the value of @lsn, then it sets the cursor to the last item with an
+ * LSN lower than @lsn. Returns NULL if the list is empty.
*/
struct xfs_log_item *
xfs_trans_ail_cursor_last(
@@ -333,10 +295,10 @@ xfs_trans_ail_cursor_last(
}
/*
- * splice the log item list into the AIL at the given LSN. We splice to the
+ * Splice the log item list into the AIL at the given LSN. We splice to the
* tail of the given LSN to maintain insert order for push traversals. The
* cursor is optional, allowing repeated updates to the same LSN to avoid
- * repeated traversals.
+ * repeated traversals. This should not be called with an empty list.
*/
static void
xfs_ail_splice(
@@ -345,50 +307,39 @@ xfs_ail_splice(
struct list_head *list,
xfs_lsn_t lsn)
{
- struct xfs_log_item *lip = cur ? cur->item : NULL;
- struct xfs_log_item *next_lip;
+ struct xfs_log_item *lip;
+
+ ASSERT(!list_empty(list));
/*
- * Get a new cursor if we don't have a placeholder or the existing one
- * has been invalidated.
+ * Use the cursor to determine the insertion point if one is
+ * provided. If not, or if the one we got is not valid,
+ * find the place in the AIL where the items belong.
*/
- if (!lip || (__psint_t)lip & 1) {
+ lip = cur ? cur->item : NULL;
+ if (!lip || (__psint_t) lip & 1)
lip = __xfs_trans_ail_cursor_last(ailp, lsn);
- if (!lip) {
- /* The list is empty, so just splice and return. */
- if (cur)
- cur->item = NULL;
- list_splice(list, &ailp->xa_ail);
- return;
- }
- }
+ /*
+ * If a cursor is provided, we know we're processing the AIL
+ * in lsn order, and future items to be spliced in will
+ * follow the last one being inserted now. Update the
+ * cursor to point to that last item, now while we have a
+ * reliable pointer to it.
+ */
+ if (cur)
+ cur->item = list_entry(list->prev, struct xfs_log_item, li_ail);
/*
- * Our cursor points to the item we want to insert _after_, so we have
- * to update the cursor to point to the end of the list we are splicing
- * in so that it points to the correct location for the next splice.
- * i.e. before the splice
- *
- * lsn -> lsn -> lsn + x -> lsn + x ...
- * ^
- * | cursor points here
- *
- * After the splice we have:
- *
- * lsn -> lsn -> lsn -> lsn -> .... -> lsn -> lsn + x -> lsn + x ...
- * ^ ^
- * | cursor points here | needs to move here
- *
- * So we set the cursor to the last item in the list to be spliced
- * before we execute the splice, resulting in the cursor pointing to
- * the correct item after the splice occurs.
+ * Finally perform the splice. Unless the AIL was empty,
+ * lip points to the item in the AIL _after_ which the new
+ * items should go. If lip is null the AIL was empty, so
+ * the new items go at the head of the AIL.
*/
- if (cur) {
- next_lip = list_entry(list->prev, struct xfs_log_item, li_ail);
- cur->item = next_lip;
- }
- list_splice(list, &lip->li_ail);
+ if (lip)
+ list_splice(list, &lip->li_ail);
+ else
+ list_splice(list, &ailp->xa_ail);
}
/*
@@ -409,25 +360,36 @@ xfsaild_push(
struct xfs_ail *ailp)
{
xfs_mount_t *mp = ailp->xa_mount;
- struct xfs_ail_cursor *cur = &ailp->xa_cursors;
+ struct xfs_ail_cursor cur;
xfs_log_item_t *lip;
xfs_lsn_t lsn;
xfs_lsn_t target;
long tout = 10;
- int flush_log = 0;
int stuck = 0;
int count = 0;
int push_xfsbufd = 0;
+ /*
+ * If last time we ran we encountered pinned items, force the log first
+ * and wait for it before pushing again.
+ */
spin_lock(&ailp->xa_lock);
+ if (ailp->xa_last_pushed_lsn == 0 && ailp->xa_log_flush &&
+ !list_empty(&ailp->xa_ail)) {
+ ailp->xa_log_flush = 0;
+ spin_unlock(&ailp->xa_lock);
+ XFS_STATS_INC(xs_push_ail_flush);
+ xfs_log_force(mp, XFS_LOG_SYNC);
+ spin_lock(&ailp->xa_lock);
+ }
+
target = ailp->xa_target;
- xfs_trans_ail_cursor_init(ailp, cur);
- lip = xfs_trans_ail_cursor_first(ailp, cur, ailp->xa_last_pushed_lsn);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
if (!lip || XFS_FORCED_SHUTDOWN(mp)) {
/*
* AIL is empty or our push has reached the end.
*/
- xfs_trans_ail_cursor_done(ailp, cur);
+ xfs_trans_ail_cursor_done(ailp, &cur);
spin_unlock(&ailp->xa_lock);
goto out_done;
}
@@ -464,16 +426,20 @@ xfsaild_push(
switch (lock_result) {
case XFS_ITEM_SUCCESS:
XFS_STATS_INC(xs_push_ail_success);
+ trace_xfs_ail_push(lip);
+
IOP_PUSH(lip);
ailp->xa_last_pushed_lsn = lsn;
break;
case XFS_ITEM_PUSHBUF:
XFS_STATS_INC(xs_push_ail_pushbuf);
+ trace_xfs_ail_pushbuf(lip);
if (!IOP_PUSHBUF(lip)) {
+ trace_xfs_ail_pushbuf_pinned(lip);
stuck++;
- flush_log = 1;
+ ailp->xa_log_flush++;
} else {
ailp->xa_last_pushed_lsn = lsn;
}
@@ -482,12 +448,15 @@ xfsaild_push(
case XFS_ITEM_PINNED:
XFS_STATS_INC(xs_push_ail_pinned);
+ trace_xfs_ail_pinned(lip);
+
stuck++;
- flush_log = 1;
+ ailp->xa_log_flush++;
break;
case XFS_ITEM_LOCKED:
XFS_STATS_INC(xs_push_ail_locked);
+ trace_xfs_ail_locked(lip);
stuck++;
break;
@@ -519,24 +488,14 @@ xfsaild_push(
if (stuck > 100)
break;
- lip = xfs_trans_ail_cursor_next(ailp, cur);
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
if (lip == NULL)
break;
lsn = lip->li_lsn;
}
- xfs_trans_ail_cursor_done(ailp, cur);
+ xfs_trans_ail_cursor_done(ailp, &cur);
spin_unlock(&ailp->xa_lock);
- if (flush_log) {
- /*
- * If something we need to push out was pinned, then
- * push out the log so it will become unpinned and
- * move forward in the AIL.
- */
- XFS_STATS_INC(xs_push_ail_flush);
- xfs_log_force(mp, 0);
- }
-
if (push_xfsbufd) {
/* we've got delayed write buffers to flush */
wake_up_process(mp->m_ddev_targp->bt_task);
@@ -547,6 +506,7 @@ out_done:
if (!count) {
/* We're past our target or empty, so idle */
ailp->xa_last_pushed_lsn = 0;
+ ailp->xa_log_flush = 0;
tout = 50;
} else if (XFS_LSN_CMP(lsn, target) >= 0) {
@@ -565,9 +525,13 @@ out_done:
* were stuck.
*
* Backoff a bit more to allow some I/O to complete before
- * continuing from where we were.
+ * restarting from the start of the AIL. This prevents us
+ * from spinning on the same items, and if they are pinned will
+ * all the restart to issue a log force to unpin the stuck
+ * items.
*/
tout = 20;
+ ailp->xa_last_pushed_lsn = 0;
}
return tout;
@@ -726,6 +690,7 @@ xfs_trans_ail_update_bulk(
int i;
LIST_HEAD(tmp);
+ ASSERT(nr_items > 0); /* Not required, but true. */
mlip = xfs_ail_min(ailp);
for (i = 0; i < nr_items; i++) {
@@ -745,7 +710,8 @@ xfs_trans_ail_update_bulk(
list_add(&lip->li_ail, &tmp);
}
- xfs_ail_splice(ailp, cur, &tmp, lsn);
+ if (!list_empty(&tmp))
+ xfs_ail_splice(ailp, cur, &tmp, lsn);
if (!mlip_changed) {
spin_unlock(&ailp->xa_lock);
@@ -864,6 +830,7 @@ xfs_trans_ail_init(
ailp->xa_mount = mp;
INIT_LIST_HEAD(&ailp->xa_ail);
+ INIT_LIST_HEAD(&ailp->xa_cursors);
spin_lock_init(&ailp->xa_lock);
ailp->xa_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 03b3b7f..475a4de 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -54,7 +54,7 @@ xfs_trans_buf_item_match(
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
blip = (struct xfs_buf_log_item *)lidp->lid_item;
if (blip->bli_item.li_type == XFS_LI_BUF &&
- XFS_BUF_TARGET(blip->bli_buf) == target &&
+ blip->bli_buf->b_target == target &&
XFS_BUF_ADDR(blip->bli_buf) == blkno &&
XFS_BUF_COUNT(blip->bli_buf) == len)
return blip->bli_buf;
@@ -80,8 +80,7 @@ _xfs_trans_bjoin(
{
struct xfs_buf_log_item *bip;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+ ASSERT(bp->b_transp == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
@@ -89,7 +88,7 @@ _xfs_trans_bjoin(
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ bip = bp->b_fspriv;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
@@ -110,7 +109,7 @@ _xfs_trans_bjoin(
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
- XFS_BUF_SET_FSPRIVATE2(bp, tp);
+ bp->b_transp = tp;
}
@@ -160,9 +159,11 @@ xfs_trans_get_buf(xfs_trans_t *tp,
*/
bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
if (bp != NULL) {
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
- XFS_BUF_SUPER_STALE(bp);
+ ASSERT(xfs_buf_islocked(bp));
+ if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
+ xfs_buf_stale(bp);
+ XFS_BUF_DONE(bp);
+ }
/*
* If the buffer is stale then it was binval'ed
@@ -172,8 +173,8 @@ xfs_trans_get_buf(xfs_trans_t *tp,
else if (XFS_BUF_ISSTALE(bp))
ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ bip = bp->b_fspriv;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -194,7 +195,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
return NULL;
}
- ASSERT(!XFS_BUF_GETERROR(bp));
+ ASSERT(!bp->b_error);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_fspriv);
@@ -232,8 +233,8 @@ xfs_trans_getsb(xfs_trans_t *tp,
* recursion count and return the buffer to the caller.
*/
bp = mp->m_sb_bp;
- if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+ if (bp->b_transp == tp) {
+ bip = bp->b_fspriv;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -293,10 +294,9 @@ xfs_trans_read_buf(
return (flags & XBF_TRYLOCK) ?
EAGAIN : XFS_ERROR(ENOMEM);
- if (XFS_BUF_GETERROR(bp) != 0) {
- xfs_ioerror_alert("xfs_trans_read_buf", mp,
- bp, blkno);
- error = XFS_BUF_GETERROR(bp);
+ if (bp->b_error) {
+ error = bp->b_error;
+ xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
return error;
}
@@ -327,10 +327,10 @@ xfs_trans_read_buf(
*/
bp = xfs_trans_buf_item_match(tp, target, blkno, len);
if (bp != NULL) {
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- ASSERT((XFS_BUF_ISERROR(bp)) == 0);
+ ASSERT(xfs_buf_islocked(bp));
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bp->b_fspriv != NULL);
+ ASSERT(!bp->b_error);
if (!(XFS_BUF_ISDONE(bp))) {
trace_xfs_trans_read_buf_io(bp, _RET_IP_);
ASSERT(!XFS_BUF_ISASYNC(bp));
@@ -338,8 +338,7 @@ xfs_trans_read_buf(
xfsbdstrat(tp->t_mountp, bp);
error = xfs_buf_iowait(bp);
if (error) {
- xfs_ioerror_alert("xfs_trans_read_buf", mp,
- bp, blkno);
+ xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
/*
* We can gracefully recover from most read
@@ -363,7 +362,7 @@ xfs_trans_read_buf(
}
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+ bip = bp->b_fspriv;
bip->bli_recur++;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -386,12 +385,11 @@ xfs_trans_read_buf(
return (flags & XBF_TRYLOCK) ?
0 : XFS_ERROR(ENOMEM);
}
- if (XFS_BUF_GETERROR(bp) != 0) {
- XFS_BUF_SUPER_STALE(bp);
- error = XFS_BUF_GETERROR(bp);
-
- xfs_ioerror_alert("xfs_trans_read_buf", mp,
- bp, blkno);
+ if (bp->b_error) {
+ error = bp->b_error;
+ xfs_buf_stale(bp);
+ XFS_BUF_DONE(bp);
+ xfs_buf_ioerror_alert(bp, __func__);
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
@@ -430,7 +428,7 @@ shutdown_abort:
if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
xfs_notice(mp, "about to pop assert, bp == 0x%p", bp);
#endif
- ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
+ ASSERT((bp->b_flags & (XBF_STALE|XBF_DELWRI)) !=
(XBF_STALE|XBF_DELWRI));
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
@@ -460,32 +458,30 @@ xfs_trans_brelse(xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
- xfs_log_item_t *lip;
/*
* Default to a normal brelse() call if the tp is NULL.
*/
if (tp == NULL) {
- ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+ struct xfs_log_item *lip = bp->b_fspriv;
+
+ ASSERT(bp->b_transp == NULL);
+
/*
* If there's a buf log item attached to the buffer,
* then let the AIL know that the buffer is being
* unlocked.
*/
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- if (lip->li_type == XFS_LI_BUF) {
- bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
- xfs_trans_unlocked_item(bip->bli_item.li_ailp,
- lip);
- }
+ if (lip != NULL && lip->li_type == XFS_LI_BUF) {
+ bip = bp->b_fspriv;
+ xfs_trans_unlocked_item(bip->bli_item.li_ailp, lip);
}
xfs_buf_relse(bp);
return;
}
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ bip = bp->b_fspriv;
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
@@ -556,7 +552,7 @@ xfs_trans_brelse(xfs_trans_t *tp,
xfs_buf_item_relse(bp);
bip = NULL;
}
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
+ bp->b_transp = NULL;
/*
* If we've still got a buf log item on the buffer, then
@@ -581,16 +577,14 @@ void
xfs_trans_bhold(xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
bip->bli_flags |= XFS_BLI_HOLD;
trace_xfs_trans_bhold(bip);
}
@@ -603,19 +597,16 @@ void
xfs_trans_bhold_release(xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(bip->bli_flags & XFS_BLI_HOLD);
- bip->bli_flags &= ~XFS_BLI_HOLD;
+ bip->bli_flags &= ~XFS_BLI_HOLD;
trace_xfs_trans_bhold_release(bip);
}
@@ -634,14 +625,13 @@ xfs_trans_log_buf(xfs_trans_t *tp,
uint first,
uint last)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
- ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
- (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
+ ASSERT(bp->b_iodone == NULL ||
+ bp->b_iodone == xfs_buf_iodone_callbacks);
/*
* Mark the buffer as needing to be written out eventually,
@@ -653,14 +643,14 @@ xfs_trans_log_buf(xfs_trans_t *tp,
* inside the b_bdstrat callback so that this won't get written to
* disk.
*/
- XFS_BUF_DELAYWRITE(bp);
XFS_BUF_DONE(bp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
- XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+ bp->b_iodone = xfs_buf_iodone_callbacks;
bip->bli_item.li_cb = xfs_buf_iodone;
+ xfs_buf_delwri_queue(bp);
+
trace_xfs_trans_log_buf(bip);
/*
@@ -706,13 +696,10 @@ xfs_trans_binval(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_binval(bip);
@@ -752,8 +739,7 @@ xfs_trans_binval(
* We set the stale bit in the buffer as well since we're getting
* rid of it.
*/
- XFS_BUF_UNDELAYWRITE(bp);
- XFS_BUF_STALE(bp);
+ xfs_buf_stale(bp);
bip->bli_flags |= XFS_BLI_STALE;
bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
@@ -780,13 +766,10 @@ xfs_trans_inode_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
-
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_BUF;
@@ -806,13 +789,10 @@ xfs_trans_stale_inode_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
-
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_STALE_INODE;
@@ -833,13 +813,10 @@ xfs_trans_inode_alloc_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
@@ -863,16 +840,13 @@ xfs_trans_dquot_buf(
xfs_buf_t *bp,
uint type)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(type == XFS_BLF_UDQUOT_BUF ||
type == XFS_BLF_PDQUOT_BUF ||
type == XFS_BLF_GDQUOT_BUF);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_format.blf_flags |= type;
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index 048b0c6..32f0288 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -47,20 +47,23 @@ xfs_trans_inode_broot_debug(
* Add a locked inode to the transaction.
*
* The inode must be locked, and it cannot be associated with any transaction.
+ * If lock_flags is non-zero the inode will be unlocked on transaction commit.
*/
void
xfs_trans_ijoin(
struct xfs_trans *tp,
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ uint lock_flags)
{
xfs_inode_log_item_t *iip;
- ASSERT(ip->i_transp == NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
if (ip->i_itemp == NULL)
xfs_inode_item_init(ip, ip->i_mount);
iip = ip->i_itemp;
+
ASSERT(iip->ili_lock_flags == 0);
+ iip->ili_lock_flags = lock_flags;
/*
* Get a log_item_desc to point at the new item.
@@ -68,31 +71,6 @@ xfs_trans_ijoin(
xfs_trans_add_item(tp, &iip->ili_item);
xfs_trans_inode_broot_debug(ip);
-
- /*
- * Initialize i_transp so we can find it with xfs_inode_incore()
- * in xfs_trans_iget() above.
- */
- ip->i_transp = tp;
-}
-
-/*
- * Add a locked inode to the transaction.
- *
- *
- * Grabs a reference to the inode which will be dropped when the transaction
- * is committed. The inode will also be unlocked at that point. The inode
- * must be locked, and it cannot be associated with any transaction.
- */
-void
-xfs_trans_ijoin_ref(
- struct xfs_trans *tp,
- struct xfs_inode *ip,
- uint lock_flags)
-{
- xfs_trans_ijoin(tp, ip);
- IHOLD(ip);
- ip->i_itemp->ili_lock_flags = lock_flags;
}
/*
@@ -111,7 +89,6 @@ xfs_trans_ichgtime(
ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_transp == tp);
tv = current_fs_time(inode->i_sb);
@@ -140,7 +117,6 @@ xfs_trans_log_inode(
xfs_inode_t *ip,
uint flags)
{
- ASSERT(ip->i_transp == tp);
ASSERT(ip->i_itemp != NULL);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index fe2e3cb..44820b9 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -53,7 +53,7 @@ void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv,
* of the list to trigger traversal restarts.
*/
struct xfs_ail_cursor {
- struct xfs_ail_cursor *next;
+ struct list_head list;
struct xfs_log_item *item;
};
@@ -67,9 +67,10 @@ struct xfs_ail {
struct task_struct *xa_task;
struct list_head xa_ail;
xfs_lsn_t xa_target;
- struct xfs_ail_cursor xa_cursors;
+ struct list_head xa_cursors;
spinlock_t xa_lock;
xfs_lsn_t xa_last_pushed_lsn;
+ int xa_log_flush;
};
/*
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 59509ae..ee98d0b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -50,430 +50,6 @@
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
-int
-xfs_setattr(
- struct xfs_inode *ip,
- struct iattr *iattr,
- int flags)
-{
- xfs_mount_t *mp = ip->i_mount;
- struct inode *inode = VFS_I(ip);
- int mask = iattr->ia_valid;
- xfs_trans_t *tp;
- int code;
- uint lock_flags;
- uint commit_flags=0;
- uid_t uid=0, iuid=0;
- gid_t gid=0, igid=0;
- struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
- int need_iolock = 1;
-
- trace_xfs_setattr(ip);
-
- if (mp->m_flags & XFS_MOUNT_RDONLY)
- return XFS_ERROR(EROFS);
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return XFS_ERROR(EIO);
-
- code = -inode_change_ok(inode, iattr);
- if (code)
- return code;
-
- olddquot1 = olddquot2 = NULL;
- udqp = gdqp = NULL;
-
- /*
- * If disk quotas is on, we make sure that the dquots do exist on disk,
- * before we start any other transactions. Trying to do this later
- * is messy. We don't care to take a readlock to look at the ids
- * in inode here, because we can't hold it across the trans_reserve.
- * If the IDs do change before we take the ilock, we're covered
- * because the i_*dquot fields will get updated anyway.
- */
- if (XFS_IS_QUOTA_ON(mp) && (mask & (ATTR_UID|ATTR_GID))) {
- uint qflags = 0;
-
- if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) {
- uid = iattr->ia_uid;
- qflags |= XFS_QMOPT_UQUOTA;
- } else {
- uid = ip->i_d.di_uid;
- }
- if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) {
- gid = iattr->ia_gid;
- qflags |= XFS_QMOPT_GQUOTA;
- } else {
- gid = ip->i_d.di_gid;
- }
-
- /*
- * We take a reference when we initialize udqp and gdqp,
- * so it is important that we never blindly double trip on
- * the same variable. See xfs_create() for an example.
- */
- ASSERT(udqp == NULL);
- ASSERT(gdqp == NULL);
- code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip),
- qflags, &udqp, &gdqp);
- if (code)
- return code;
- }
-
- /*
- * For the other attributes, we acquire the inode lock and
- * first do an error checking pass.
- */
- tp = NULL;
- lock_flags = XFS_ILOCK_EXCL;
- if (flags & XFS_ATTR_NOLOCK)
- need_iolock = 0;
- if (!(mask & ATTR_SIZE)) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
- commit_flags = 0;
- code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
- 0, 0, 0);
- if (code) {
- lock_flags = 0;
- goto error_return;
- }
- } else {
- if (need_iolock)
- lock_flags |= XFS_IOLOCK_EXCL;
- }
-
- xfs_ilock(ip, lock_flags);
-
- /*
- * Change file ownership. Must be the owner or privileged.
- */
- if (mask & (ATTR_UID|ATTR_GID)) {
- /*
- * These IDs could have changed since we last looked at them.
- * But, we're assured that if the ownership did change
- * while we didn't have the inode locked, inode's dquot(s)
- * would have changed also.
- */
- iuid = ip->i_d.di_uid;
- igid = ip->i_d.di_gid;
- gid = (mask & ATTR_GID) ? iattr->ia_gid : igid;
- uid = (mask & ATTR_UID) ? iattr->ia_uid : iuid;
-
- /*
- * Do a quota reservation only if uid/gid is actually
- * going to change.
- */
- if (XFS_IS_QUOTA_RUNNING(mp) &&
- ((XFS_IS_UQUOTA_ON(mp) && iuid != uid) ||
- (XFS_IS_GQUOTA_ON(mp) && igid != gid))) {
- ASSERT(tp);
- code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
- capable(CAP_FOWNER) ?
- XFS_QMOPT_FORCE_RES : 0);
- if (code) /* out of quota */
- goto error_return;
- }
- }
-
- /*
- * Truncate file. Must have write permission and not be a directory.
- */
- if (mask & ATTR_SIZE) {
- /* Short circuit the truncate case for zero length files */
- if (iattr->ia_size == 0 &&
- ip->i_size == 0 && ip->i_d.di_nextents == 0) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- lock_flags &= ~XFS_ILOCK_EXCL;
- if (mask & ATTR_CTIME) {
- inode->i_mtime = inode->i_ctime =
- current_fs_time(inode->i_sb);
- xfs_mark_inode_dirty_sync(ip);
- }
- code = 0;
- goto error_return;
- }
-
- if (S_ISDIR(ip->i_d.di_mode)) {
- code = XFS_ERROR(EISDIR);
- goto error_return;
- } else if (!S_ISREG(ip->i_d.di_mode)) {
- code = XFS_ERROR(EINVAL);
- goto error_return;
- }
-
- /*
- * Make sure that the dquots are attached to the inode.
- */
- code = xfs_qm_dqattach_locked(ip, 0);
- if (code)
- goto error_return;
-
- /*
- * Now we can make the changes. Before we join the inode
- * to the transaction, if ATTR_SIZE is set then take care of
- * the part of the truncation that must be done without the
- * inode lock. This needs to be done before joining the inode
- * to the transaction, because the inode cannot be unlocked
- * once it is a part of the transaction.
- */
- if (iattr->ia_size > ip->i_size) {
- /*
- * Do the first part of growing a file: zero any data
- * in the last block that is beyond the old EOF. We
- * need to do this before the inode is joined to the
- * transaction to modify the i_size.
- */
- code = xfs_zero_eof(ip, iattr->ia_size, ip->i_size);
- if (code)
- goto error_return;
- }
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- lock_flags &= ~XFS_ILOCK_EXCL;
-
- /*
- * We are going to log the inode size change in this
- * transaction so any previous writes that are beyond the on
- * disk EOF and the new EOF that have not been written out need
- * to be written here. If we do not write the data out, we
- * expose ourselves to the null files problem.
- *
- * Only flush from the on disk size to the smaller of the in
- * memory file size or the new size as that's the range we
- * really care about here and prevents waiting for other data
- * not within the range we care about here.
- */
- if (ip->i_size != ip->i_d.di_size &&
- iattr->ia_size > ip->i_d.di_size) {
- code = xfs_flush_pages(ip,
- ip->i_d.di_size, iattr->ia_size,
- XBF_ASYNC, FI_NONE);
- if (code)
- goto error_return;
- }
-
- /* wait for all I/O to complete */
- xfs_ioend_wait(ip);
-
- code = -block_truncate_page(inode->i_mapping, iattr->ia_size,
- xfs_get_blocks);
- if (code)
- goto error_return;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);
- code = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES,
- XFS_ITRUNCATE_LOG_COUNT);
- if (code)
- goto error_return;
-
- truncate_setsize(inode, iattr->ia_size);
-
- commit_flags = XFS_TRANS_RELEASE_LOG_RES;
- lock_flags |= XFS_ILOCK_EXCL;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip);
-
- /*
- * Only change the c/mtime if we are changing the size
- * or we are explicitly asked to change it. This handles
- * the semantic difference between truncate() and ftruncate()
- * as implemented in the VFS.
- *
- * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
- * is a special case where we need to update the times despite
- * not having these flags set. For all other operations the
- * VFS set these flags explicitly if it wants a timestamp
- * update.
- */
- if (iattr->ia_size != ip->i_size &&
- (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
- iattr->ia_ctime = iattr->ia_mtime =
- current_fs_time(inode->i_sb);
- mask |= ATTR_CTIME | ATTR_MTIME;
- }
-
- if (iattr->ia_size > ip->i_size) {
- ip->i_d.di_size = iattr->ia_size;
- ip->i_size = iattr->ia_size;
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- } else if (iattr->ia_size <= ip->i_size ||
- (iattr->ia_size == 0 && ip->i_d.di_nextents)) {
- /*
- * signal a sync transaction unless
- * we're truncating an already unlinked
- * file on a wsync filesystem
- */
- code = xfs_itruncate_finish(&tp, ip, iattr->ia_size,
- XFS_DATA_FORK,
- ((ip->i_d.di_nlink != 0 ||
- !(mp->m_flags & XFS_MOUNT_WSYNC))
- ? 1 : 0));
- if (code)
- goto abort_return;
- /*
- * Truncated "down", so we're removing references
- * to old data here - if we now delay flushing for
- * a long time, we expose ourselves unduly to the
- * notorious NULL files problem. So, we mark this
- * vnode and flush it when the file is closed, and
- * do not wait the usual (long) time for writeout.
- */
- xfs_iflags_set(ip, XFS_ITRUNCATED);
- }
- } else if (tp) {
- xfs_trans_ijoin(tp, ip);
- }
-
- /*
- * Change file ownership. Must be the owner or privileged.
- */
- if (mask & (ATTR_UID|ATTR_GID)) {
- /*
- * CAP_FSETID overrides the following restrictions:
- *
- * The set-user-ID and set-group-ID bits of a file will be
- * cleared upon successful return from chown()
- */
- if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
- !capable(CAP_FSETID)) {
- ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
- }
-
- /*
- * Change the ownerships and register quota modifications
- * in the transaction.
- */
- if (iuid != uid) {
- if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_UQUOTA_ON(mp)) {
- ASSERT(mask & ATTR_UID);
- ASSERT(udqp);
- olddquot1 = xfs_qm_vop_chown(tp, ip,
- &ip->i_udquot, udqp);
- }
- ip->i_d.di_uid = uid;
- inode->i_uid = uid;
- }
- if (igid != gid) {
- if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) {
- ASSERT(!XFS_IS_PQUOTA_ON(mp));
- ASSERT(mask & ATTR_GID);
- ASSERT(gdqp);
- olddquot2 = xfs_qm_vop_chown(tp, ip,
- &ip->i_gdquot, gdqp);
- }
- ip->i_d.di_gid = gid;
- inode->i_gid = gid;
- }
- }
-
- /*
- * Change file access modes.
- */
- if (mask & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
-
- if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
- mode &= ~S_ISGID;
-
- ip->i_d.di_mode &= S_IFMT;
- ip->i_d.di_mode |= mode & ~S_IFMT;
-
- inode->i_mode &= S_IFMT;
- inode->i_mode |= mode & ~S_IFMT;
- }
-
- /*
- * Change file access or modified times.
- */
- if (mask & ATTR_ATIME) {
- inode->i_atime = iattr->ia_atime;
- ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
- ip->i_update_core = 1;
- }
- if (mask & ATTR_CTIME) {
- inode->i_ctime = iattr->ia_ctime;
- ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- ip->i_update_core = 1;
- }
- if (mask & ATTR_MTIME) {
- inode->i_mtime = iattr->ia_mtime;
- ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- ip->i_update_core = 1;
- }
-
- /*
- * And finally, log the inode core if any attribute in it
- * has been changed.
- */
- if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
- ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-
- XFS_STATS_INC(xs_ig_attrchg);
-
- /*
- * If this is a synchronous mount, make sure that the
- * transaction goes to disk before returning to the user.
- * This is slightly sub-optimal in that truncates require
- * two sync transactions instead of one for wsync filesystems.
- * One for the truncate and one for the timestamps since we
- * don't want to change the timestamps unless we're sure the
- * truncate worked. Truncates are less than 1% of the laddis
- * mix so this probably isn't worth the trouble to optimize.
- */
- code = 0;
- if (mp->m_flags & XFS_MOUNT_WSYNC)
- xfs_trans_set_sync(tp);
-
- code = xfs_trans_commit(tp, commit_flags);
-
- xfs_iunlock(ip, lock_flags);
-
- /*
- * Release any dquot(s) the inode had kept before chown.
- */
- xfs_qm_dqrele(olddquot1);
- xfs_qm_dqrele(olddquot2);
- xfs_qm_dqrele(udqp);
- xfs_qm_dqrele(gdqp);
-
- if (code)
- return code;
-
- /*
- * XXX(hch): Updating the ACL entries is not atomic vs the i_mode
- * update. We could avoid this with linked transactions
- * and passing down the transaction pointer all the way
- * to attr_set. No previous user of the generic
- * Posix ACL code seems to care about this issue either.
- */
- if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
- code = -xfs_acl_chmod(inode);
- if (code)
- return XFS_ERROR(code);
- }
-
- return 0;
-
- abort_return:
- commit_flags |= XFS_TRANS_ABORT;
- error_return:
- xfs_qm_dqrele(udqp);
- xfs_qm_dqrele(gdqp);
- if (tp) {
- xfs_trans_cancel(tp, commit_flags);
- }
- if (lock_flags != 0) {
- xfs_iunlock(ip, lock_flags);
- }
- return code;
-}
-
/*
* The maximum pathlen is 1024 bytes. Since the minimum file system
* blocksize is 512 bytes, we can get a max of 2 extents back from
@@ -496,8 +72,8 @@ xfs_readlink_bmap(
xfs_buf_t *bp;
int error = 0;
- error = xfs_bmapi(NULL, ip, 0, XFS_B_TO_FSB(mp, pathlen), 0, NULL, 0,
- mval, &nmaps, NULL);
+ error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, pathlen), mval, &nmaps,
+ 0);
if (error)
goto out;
@@ -507,10 +83,11 @@ xfs_readlink_bmap(
bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt),
XBF_LOCK | XBF_MAPPED | XBF_DONT_BLOCK);
- error = XFS_BUF_GETERROR(bp);
+ if (!bp)
+ return XFS_ERROR(ENOMEM);
+ error = bp->b_error;
if (error) {
- xfs_ioerror_alert("xfs_readlink",
- ip->i_mount, bp, XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
goto out;
}
@@ -518,7 +95,7 @@ xfs_readlink_bmap(
byte_cnt = pathlen;
pathlen -= byte_cnt;
- memcpy(link, XFS_BUF_PTR(bp), byte_cnt);
+ memcpy(link, bp->b_addr, byte_cnt);
xfs_buf_relse(bp);
}
@@ -607,8 +184,7 @@ xfs_free_eofblocks(
nimaps = 1;
xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_bmapi(NULL, ip, end_fsb, map_len, 0,
- NULL, 0, &imap, &nimaps, NULL);
+ error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
if (!error && (nimaps != 0) &&
@@ -628,13 +204,6 @@ xfs_free_eofblocks(
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- /*
- * Do the xfs_itruncate_start() call before
- * reserving any log space because
- * itruncate_start will call into the buffer
- * cache and we can't
- * do that within a transaction.
- */
if (flags & XFS_FREE_EOF_TRYLOCK) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
@@ -643,13 +212,6 @@ xfs_free_eofblocks(
} else {
xfs_ilock(ip, XFS_IOLOCK_EXCL);
}
- error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE,
- ip->i_size);
- if (error) {
- xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return error;
- }
error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp),
@@ -663,17 +225,14 @@ xfs_free_eofblocks(
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
- error = xfs_itruncate_finish(&tp, ip,
- ip->i_size,
- XFS_DATA_FORK,
- 0);
- /*
- * If we get an error at this point we
- * simply don't bother truncating the file.
- */
+ error = xfs_itruncate_data(&tp, ip, ip->i_size);
if (error) {
+ /*
+ * If we get an error at this point we simply don't
+ * bother truncating the file.
+ */
xfs_trans_cancel(tp,
(XFS_TRANS_RELEASE_LOG_RES |
XFS_TRANS_ABORT));
@@ -735,7 +294,7 @@ xfs_inactive_symlink_rmt(
xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
size = (int)ip->i_d.di_size;
ip->i_d.di_size = 0;
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/*
* Find the block(s) so we can inval and unmap them.
@@ -743,9 +302,9 @@ xfs_inactive_symlink_rmt(
done = 0;
xfs_bmap_init(&free_list, &first_block);
nmaps = ARRAY_SIZE(mval);
- if ((error = xfs_bmapi(tp, ip, 0, XFS_B_TO_FSB(mp, size),
- XFS_BMAPI_METADATA, &first_block, 0, mval, &nmaps,
- &free_list)))
+ error = xfs_bmapi_read(ip, 0, XFS_B_TO_FSB(mp, size),
+ mval, &nmaps, 0);
+ if (error)
goto error0;
/*
* Invalidate the block(s).
@@ -754,6 +313,10 @@ xfs_inactive_symlink_rmt(
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
+ if (!bp) {
+ error = ENOMEM;
+ goto error1;
+ }
xfs_trans_binval(tp, bp);
}
/*
@@ -779,7 +342,7 @@ xfs_inactive_symlink_rmt(
* Mark it dirty so it will be logged and moved forward in the log as
* part of every commit.
*/
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/*
* Get a new, empty transaction to return to our caller.
@@ -912,7 +475,7 @@ xfs_inactive_attrs(
goto error_cancel;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
ASSERT(ip->i_d.di_anextents == 0);
@@ -977,7 +540,7 @@ xfs_release(
if (ip->i_d.di_nlink == 0)
return 0;
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS)) &&
@@ -1058,7 +621,7 @@ xfs_inactive(
truncate = ((ip->i_d.di_nlink == 0) &&
((ip->i_d.di_size != 0) || (ip->i_size != 0) ||
(ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
- ((ip->i_d.di_mode & S_IFMT) == S_IFREG));
+ S_ISREG(ip->i_d.di_mode));
mp = ip->i_mount;
@@ -1069,7 +632,7 @@ xfs_inactive(
goto out;
if (ip->i_d.di_nlink != 0) {
- if ((((ip->i_d.di_mode & S_IFMT) == S_IFREG) &&
+ if ((S_ISREG(ip->i_d.di_mode) &&
((ip->i_size > 0) || (VN_CACHED(VFS_I(ip)) > 0 ||
ip->i_delayed_blks > 0)) &&
(ip->i_df.if_flags & XFS_IFEXTENTS) &&
@@ -1091,21 +654,8 @@ xfs_inactive(
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
if (truncate) {
- /*
- * Do the xfs_itruncate_start() call before
- * reserving any log space because itruncate_start
- * will call into the buffer cache and we can't
- * do that within a transaction.
- */
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- error = xfs_itruncate_start(ip, XFS_ITRUNC_DEFINITE, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return VN_INACTIVE_CACHE;
- }
-
error = xfs_trans_reserve(tp, 0,
XFS_ITRUNCATE_LOG_RES(mp),
0, XFS_TRANS_PERM_LOG_RES,
@@ -1119,25 +669,16 @@ xfs_inactive(
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
-
- /*
- * normally, we have to run xfs_itruncate_finish sync.
- * But if filesystem is wsync and we're in the inactive
- * path, then we know that nlink == 0, and that the
- * xaction that made nlink == 0 is permanently committed
- * since xfs_remove runs as a synchronous transaction.
- */
- error = xfs_itruncate_finish(&tp, ip, 0, XFS_DATA_FORK,
- (!(mp->m_flags & XFS_MOUNT_WSYNC) ? 1 : 0));
+ xfs_trans_ijoin(tp, ip, 0);
+ error = xfs_itruncate_data(&tp, ip, 0);
if (error) {
xfs_trans_cancel(tp,
XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
return VN_INACTIVE_CACHE;
}
- } else if ((ip->i_d.di_mode & S_IFMT) == S_IFLNK) {
+ } else if (S_ISLNK(ip->i_d.di_mode)) {
/*
* If we get an error while cleaning up a
@@ -1152,7 +693,7 @@ xfs_inactive(
return VN_INACTIVE_CACHE;
}
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
} else {
error = xfs_trans_reserve(tp, 0,
XFS_IFREE_LOG_RES(mp),
@@ -1165,7 +706,7 @@ xfs_inactive(
}
xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
}
/*
@@ -1405,7 +946,7 @@ xfs_create(
* the transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
- xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
unlock_dp_on_error = B_FALSE;
error = xfs_dir_createname(tp, dp, name, ip->i_ino,
@@ -1726,8 +1267,8 @@ xfs_remove(
xfs_lock_two_inodes(dp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
/*
* If we're removing a directory perform some additional validation.
@@ -1872,8 +1413,8 @@ xfs_link(
xfs_lock_two_inodes(sip, tdp, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, sip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, tdp, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, sip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, tdp, XFS_ILOCK_EXCL);
/*
* If the source has too many links, we can't make any more to it.
@@ -2067,7 +1608,7 @@ xfs_symlink(
* transaction cancel unlocking dp so don't do it explicitly in the
* error path.
*/
- xfs_trans_ijoin_ref(tp, dp, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, dp, XFS_ILOCK_EXCL);
unlock_dp_on_error = B_FALSE;
/*
@@ -2098,10 +1639,9 @@ xfs_symlink(
first_fsb = 0;
nmaps = SYMLINK_MAPS;
- error = xfs_bmapi(tp, ip, first_fsb, fs_blocks,
- XFS_BMAPI_WRITE | XFS_BMAPI_METADATA,
- &first_block, resblks, mval, &nmaps,
- &free_list);
+ error = xfs_bmapi_write(tp, ip, first_fsb, fs_blocks,
+ XFS_BMAPI_METADATA, &first_block, resblks,
+ mval, &nmaps, &free_list);
if (error)
goto error2;
@@ -2116,13 +1656,16 @@ xfs_symlink(
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
BTOBB(byte_cnt), 0);
- ASSERT(bp && !XFS_BUF_GETERROR(bp));
+ if (!bp) {
+ error = ENOMEM;
+ goto error2;
+ }
if (pathlen < byte_cnt) {
byte_cnt = pathlen;
}
pathlen -= byte_cnt;
- memcpy(XFS_BUF_PTR(bp), cur_chunk, byte_cnt);
+ memcpy(bp->b_addr, cur_chunk, byte_cnt);
cur_chunk += byte_cnt;
xfs_trans_log_buf(tp, bp, 0, byte_cnt - 1);
@@ -2198,7 +1741,7 @@ xfs_set_dmattrs(
return error;
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin_ref(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
ip->i_d.di_dmevmask = evmask;
ip->i_d.di_dmstate = state;
@@ -2244,7 +1787,6 @@ xfs_alloc_file_space(
xfs_fileoff_t startoffset_fsb;
xfs_fsblock_t firstfsb;
int nimaps;
- int bmapi_flag;
int quota_flag;
int rt;
xfs_trans_t *tp;
@@ -2272,7 +1814,6 @@ xfs_alloc_file_space(
count = len;
imapp = &imaps[0];
nimaps = 1;
- bmapi_flag = XFS_BMAPI_WRITE | alloc_type;
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
allocatesize_fsb = XFS_B_TO_FSB(mp, count);
@@ -2343,16 +1884,12 @@ xfs_alloc_file_space(
if (error)
goto error1;
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
- /*
- * Issue the xfs_bmapi() call to allocate the blocks
- */
xfs_bmap_init(&free_list, &firstfsb);
- error = xfs_bmapi(tp, ip, startoffset_fsb,
- allocatesize_fsb, bmapi_flag,
- &firstfsb, 0, imapp, &nimaps,
- &free_list);
+ error = xfs_bmapi_write(tp, ip, startoffset_fsb,
+ allocatesize_fsb, alloc_type, &firstfsb,
+ 0, imapp, &nimaps, &free_list);
if (error) {
goto error0;
}
@@ -2437,11 +1974,12 @@ xfs_zero_remaining_bytes(
if (!bp)
return XFS_ERROR(ENOMEM);
+ xfs_buf_unlock(bp);
+
for (offset = startoff; offset <= endoff; offset = lastoffset + 1) {
offset_fsb = XFS_B_TO_FSBT(mp, offset);
nimap = 1;
- error = xfs_bmapi(NULL, ip, offset_fsb, 1, 0,
- NULL, 0, &imap, &nimap, NULL);
+ error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0);
if (error || nimap < 1)
break;
ASSERT(imap.br_blockcount >= 1);
@@ -2461,11 +1999,11 @@ xfs_zero_remaining_bytes(
xfsbdstrat(mp, bp);
error = xfs_buf_iowait(bp);
if (error) {
- xfs_ioerror_alert("xfs_zero_remaining_bytes(read)",
- mp, bp, XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp,
+ "xfs_zero_remaining_bytes(read)");
break;
}
- memset(XFS_BUF_PTR(bp) +
+ memset(bp->b_addr +
(offset - XFS_FSB_TO_B(mp, imap.br_startoff)),
0, lastoffset - offset + 1);
XFS_BUF_UNDONE(bp);
@@ -2474,8 +2012,8 @@ xfs_zero_remaining_bytes(
xfsbdstrat(mp, bp);
error = xfs_buf_iowait(bp);
if (error) {
- xfs_ioerror_alert("xfs_zero_remaining_bytes(write)",
- mp, bp, XFS_BUF_ADDR(bp));
+ xfs_buf_ioerror_alert(bp,
+ "xfs_zero_remaining_bytes(write)");
break;
}
}
@@ -2540,7 +2078,7 @@ xfs_free_file_space(
if (need_iolock) {
xfs_ilock(ip, XFS_IOLOCK_EXCL);
/* wait for the completion of any pending DIOs */
- xfs_ioend_wait(ip);
+ inode_dio_wait(VFS_I(ip));
}
rounding = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
@@ -2560,8 +2098,8 @@ xfs_free_file_space(
*/
if (rt && !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
nimap = 1;
- error = xfs_bmapi(NULL, ip, startoffset_fsb,
- 1, 0, NULL, 0, &imap, &nimap, NULL);
+ error = xfs_bmapi_read(ip, startoffset_fsb, 1,
+ &imap, &nimap, 0);
if (error)
goto out_unlock_iolock;
ASSERT(nimap == 0 || nimap == 1);
@@ -2575,8 +2113,8 @@ xfs_free_file_space(
startoffset_fsb += mp->m_sb.sb_rextsize - mod;
}
nimap = 1;
- error = xfs_bmapi(NULL, ip, endoffset_fsb - 1,
- 1, 0, NULL, 0, &imap, &nimap, NULL);
+ error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,
+ &imap, &nimap, 0);
if (error)
goto out_unlock_iolock;
ASSERT(nimap == 0 || nimap == 1);
@@ -2644,7 +2182,7 @@ xfs_free_file_space(
if (error)
goto error1;
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, 0);
/*
* issue the bunmapi() call to free the blocks
@@ -2791,7 +2329,7 @@ xfs_change_file_space(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = startoffset;
- error = xfs_setattr(ip, &iattr, attr_flags);
+ error = xfs_setattr_size(ip, &iattr, attr_flags);
if (error)
return error;
@@ -2817,8 +2355,7 @@ xfs_change_file_space(
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
if ((attr_flags & XFS_ATTR_DMI) == 0) {
ip->i_d.di_mode &= ~S_ISUID;
@@ -2843,10 +2380,5 @@ xfs_change_file_space(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (attr_flags & XFS_ATTR_SYNC)
xfs_trans_set_sync(tp);
-
- error = xfs_trans_commit(tp, 0);
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- return error;
+ return xfs_trans_commit(tp, 0);
}
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h
index 3bcd233..c0f7714 100644
--- a/fs/xfs/xfs_vnodeops.h
+++ b/fs/xfs/xfs_vnodeops.h
@@ -13,7 +13,8 @@ struct xfs_inode;
struct xfs_iomap;
-int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, int flags);
+int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags);
#define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */
#define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */
#define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */
@@ -58,6 +59,7 @@ int xfs_flush_pages(struct xfs_inode *ip, xfs_off_t first,
xfs_off_t last, uint64_t flags, int fiopt);
int xfs_wait_on_pages(struct xfs_inode *ip, xfs_off_t first, xfs_off_t last);
-int xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t);
+int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
+ xfs_fsize_t isize, bool *did_zeroing);
#endif /* _XFS_VNODEOPS_H */
diff --git a/fs/yaffs2/Kconfig b/fs/yaffs2/Kconfig
deleted file mode 100644
index 6354140..0000000
--- a/fs/yaffs2/Kconfig
+++ /dev/null
@@ -1,161 +0,0 @@
-#
-# YAFFS file system configurations
-#
-
-config YAFFS_FS
- tristate "YAFFS2 file system support"
- default n
- depends on MTD_BLOCK
- select YAFFS_YAFFS1
- select YAFFS_YAFFS2
- help
- YAFFS2, or Yet Another Flash Filing System, is a filing system
- optimised for NAND Flash chips.
-
- To compile the YAFFS2 file system support as a module, choose M
- here: the module will be called yaffs2.
-
- If unsure, say N.
-
- Further information on YAFFS2 is available at
- <http://www.aleph1.co.uk/yaffs/>.
-
-config YAFFS_YAFFS1
- bool "512 byte / page devices"
- depends on YAFFS_FS
- default y
- help
- Enable YAFFS1 support -- yaffs for 512 byte / page devices
-
- Not needed for 2K-page devices.
-
- If unsure, say Y.
-
-config YAFFS_9BYTE_TAGS
- bool "Use older-style on-NAND data format with pageStatus byte"
- depends on YAFFS_YAFFS1
- default n
- help
-
- Older-style on-NAND data format has a "pageStatus" byte to record
- chunk/page state. This byte is zero when the page is discarded.
- Choose this option if you have existing on-NAND data using this
- format that you need to continue to support. New data written
- also uses the older-style format. Note: Use of this option
- generally requires that MTD's oob layout be adjusted to use the
- older-style format. See notes on tags formats and MTD versions
- in yaffs_mtdif1.c.
-
- If unsure, say N.
-
-config YAFFS_DOES_ECC
- bool "Lets Yaffs do its own ECC"
- depends on YAFFS_FS && YAFFS_YAFFS1 && !YAFFS_9BYTE_TAGS
- default n
- help
- This enables Yaffs to use its own ECC functions instead of using
- the ones from the generic MTD-NAND driver.
-
- If unsure, say N.
-
-config YAFFS_ECC_WRONG_ORDER
- bool "Use the same ecc byte order as Steven Hill's nand_ecc.c"
- depends on YAFFS_FS && YAFFS_DOES_ECC && !YAFFS_9BYTE_TAGS
- default n
- help
- This makes yaffs_ecc.c use the same ecc byte order as Steven
- Hill's nand_ecc.c. If not set, then you get the same ecc byte
- order as SmartMedia.
-
- If unsure, say N.
-
-config YAFFS_YAFFS2
- bool "2048 byte (or larger) / page devices"
- depends on YAFFS_FS
- default y
- help
- Enable YAFFS2 support -- yaffs for >= 2K bytes per page devices
-
- If unsure, say Y.
-
-config YAFFS_AUTO_YAFFS2
- bool "Autoselect yaffs2 format"
- depends on YAFFS_YAFFS2
- default y
- help
- Without this, you need to explicitely use yaffs2 as the file
- system type. With this, you can say "yaffs" and yaffs or yaffs2
- will be used depending on the device page size (yaffs on
- 512-byte page devices, yaffs2 on 2K page devices).
-
- If unsure, say Y.
-
-config YAFFS_DISABLE_TAGS_ECC
- bool "Disable YAFFS from doing ECC on tags by default"
- depends on YAFFS_FS && YAFFS_YAFFS2
- default n
- help
- This defaults Yaffs to using its own ECC calculations on tags instead of
- just relying on the MTD.
- This behavior can also be overridden with tags_ecc_on and
- tags_ecc_off mount options.
-
- If unsure, say N.
-
-config YAFFS_ALWAYS_CHECK_CHUNK_ERASED
- bool "Force chunk erase check"
- depends on YAFFS_FS
- default n
- help
- Normally YAFFS only checks chunks before writing until an erased
- chunk is found. This helps to detect any partially written
- chunks that might have happened due to power loss.
-
- Enabling this forces on the test that chunks are erased in flash
- before writing to them. This takes more time but is potentially
- a bit more secure.
-
- Suggest setting Y during development and ironing out driver
- issues etc. Suggest setting to N if you want faster writing.
-
- If unsure, say Y.
-
-config YAFFS_EMPTY_LOST_AND_FOUND
- bool "Empty lost and found on boot"
- depends on YAFFS_FS
- default n
- help
- If this is enabled then the contents of lost and found is
- automatically dumped at mount.
-
- If unsure, say N.
-
-config YAFFS_DISABLE_BLOCK_REFRESHING
- bool "Disable yaffs2 block refreshing"
- depends on YAFFS_FS
- default n
- help
- If this is set, then block refreshing is disabled.
- Block refreshing infrequently refreshes the oldest block in
- a yaffs2 file system. This mechanism helps to refresh flash to
- mitigate against data loss. This is particularly useful for MLC.
-
- If unsure, say N.
-
-config YAFFS_DISABLE_BACKGROUND
- bool "Disable yaffs2 background processing"
- depends on YAFFS_FS
- default n
- help
- If this is set, then background processing is disabled.
- Background processing makes many foreground activities faster.
-
- If unsure, say N.
-
-config YAFFS_XATTR
- bool "Enable yaffs2 xattr support"
- depends on YAFFS_FS
- default y
- help
- If this is set then yaffs2 will provide xattr support.
- If unsure, say Y.
diff --git a/fs/yaffs2/Makefile b/fs/yaffs2/Makefile
deleted file mode 100644
index e63a28a..0000000
--- a/fs/yaffs2/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-#
-# Makefile for the linux YAFFS filesystem routines.
-#
-
-obj-$(CONFIG_YAFFS_FS) += yaffs.o
-
-yaffs-y := yaffs_ecc.o yaffs_vfs.o yaffs_guts.o yaffs_checkptrw.o
-yaffs-y += yaffs_packedtags1.o yaffs_packedtags2.o yaffs_nand.o
-yaffs-y += yaffs_tagscompat.o yaffs_tagsvalidity.o
-yaffs-y += yaffs_mtdif.o yaffs_mtdif1.o yaffs_mtdif2.o
-yaffs-y += yaffs_nameval.o yaffs_attribs.o
-yaffs-y += yaffs_allocator.o
-yaffs-y += yaffs_yaffs1.o
-yaffs-y += yaffs_yaffs2.o
-yaffs-y += yaffs_bitmap.o
-yaffs-y += yaffs_verify.o
-
diff --git a/fs/yaffs2/yaffs_allocator.c b/fs/yaffs2/yaffs_allocator.c
deleted file mode 100644
index f9cd5be..0000000
--- a/fs/yaffs2/yaffs_allocator.c
+++ /dev/null
@@ -1,396 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_allocator.h"
-#include "yaffs_guts.h"
-#include "yaffs_trace.h"
-#include "yportenv.h"
-
-#ifdef CONFIG_YAFFS_KMALLOC_ALLOCATOR
-
-void yaffs_deinit_raw_tnodes_and_objs(struct yaffs_dev *dev)
-{
- dev = dev;
-}
-
-void yaffs_init_raw_tnodes_and_objs(struct yaffs_dev *dev)
-{
- dev = dev;
-}
-
-struct yaffs_tnode *yaffs_alloc_raw_tnode(struct yaffs_dev *dev)
-{
- return (struct yaffs_tnode *)kmalloc(dev->tnode_size, GFP_NOFS);
-}
-
-void yaffs_free_raw_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn)
-{
- dev = dev;
- kfree(tn);
-}
-
-void yaffs_init_raw_objs(struct yaffs_dev *dev)
-{
- dev = dev;
-}
-
-void yaffs_deinit_raw_objs(struct yaffs_dev *dev)
-{
- dev = dev;
-}
-
-struct yaffs_obj *yaffs_alloc_raw_obj(struct yaffs_dev *dev)
-{
- dev = dev;
- return (struct yaffs_obj *)kmalloc(sizeof(struct yaffs_obj));
-}
-
-void yaffs_free_raw_obj(struct yaffs_dev *dev, struct yaffs_obj *obj)
-{
-
- dev = dev;
- kfree(obj);
-}
-
-#else
-
-struct yaffs_tnode_list {
- struct yaffs_tnode_list *next;
- struct yaffs_tnode *tnodes;
-};
-
-struct yaffs_obj_list {
- struct yaffs_obj_list *next;
- struct yaffs_obj *objects;
-};
-
-struct yaffs_allocator {
- int n_tnodes_created;
- struct yaffs_tnode *free_tnodes;
- int n_free_tnodes;
- struct yaffs_tnode_list *alloc_tnode_list;
-
- int n_obj_created;
- struct yaffs_obj *free_objs;
- int n_free_objects;
-
- struct yaffs_obj_list *allocated_obj_list;
-};
-
-static void yaffs_deinit_raw_tnodes(struct yaffs_dev *dev)
-{
-
- struct yaffs_allocator *allocator =
- (struct yaffs_allocator *)dev->allocator;
-
- struct yaffs_tnode_list *tmp;
-
- if (!allocator) {
- YBUG();
- return;
- }
-
- while (allocator->alloc_tnode_list) {
- tmp = allocator->alloc_tnode_list->next;
-
- kfree(allocator->alloc_tnode_list->tnodes);
- kfree(allocator->alloc_tnode_list);
- allocator->alloc_tnode_list = tmp;
-
- }
-
- allocator->free_tnodes = NULL;
- allocator->n_free_tnodes = 0;
- allocator->n_tnodes_created = 0;
-}
-
-static void yaffs_init_raw_tnodes(struct yaffs_dev *dev)
-{
- struct yaffs_allocator *allocator = dev->allocator;
-
- if (allocator) {
- allocator->alloc_tnode_list = NULL;
- allocator->free_tnodes = NULL;
- allocator->n_free_tnodes = 0;
- allocator->n_tnodes_created = 0;
- } else {
- YBUG();
- }
-}
-
-static int yaffs_create_tnodes(struct yaffs_dev *dev, int n_tnodes)
-{
- struct yaffs_allocator *allocator =
- (struct yaffs_allocator *)dev->allocator;
- int i;
- struct yaffs_tnode *new_tnodes;
- u8 *mem;
- struct yaffs_tnode *curr;
- struct yaffs_tnode *next;
- struct yaffs_tnode_list *tnl;
-
- if (!allocator) {
- YBUG();
- return YAFFS_FAIL;
- }
-
- if (n_tnodes < 1)
- return YAFFS_OK;
-
- /* make these things */
-
- new_tnodes = kmalloc(n_tnodes * dev->tnode_size, GFP_NOFS);
- mem = (u8 *) new_tnodes;
-
- if (!new_tnodes) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs: Could not allocate Tnodes");
- return YAFFS_FAIL;
- }
-
- /* New hookup for wide tnodes */
- for (i = 0; i < n_tnodes - 1; i++) {
- curr = (struct yaffs_tnode *)&mem[i * dev->tnode_size];
- next = (struct yaffs_tnode *)&mem[(i + 1) * dev->tnode_size];
- curr->internal[0] = next;
- }
-
- curr = (struct yaffs_tnode *)&mem[(n_tnodes - 1) * dev->tnode_size];
- curr->internal[0] = allocator->free_tnodes;
- allocator->free_tnodes = (struct yaffs_tnode *)mem;
-
- allocator->n_free_tnodes += n_tnodes;
- allocator->n_tnodes_created += n_tnodes;
-
- /* Now add this bunch of tnodes to a list for freeing up.
- * NB If we can't add this to the management list it isn't fatal
- * but it just means we can't free this bunch of tnodes later.
- */
-
- tnl = kmalloc(sizeof(struct yaffs_tnode_list), GFP_NOFS);
- if (!tnl) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "Could not add tnodes to management list");
- return YAFFS_FAIL;
- } else {
- tnl->tnodes = new_tnodes;
- tnl->next = allocator->alloc_tnode_list;
- allocator->alloc_tnode_list = tnl;
- }
-
- yaffs_trace(YAFFS_TRACE_ALLOCATE,"Tnodes added");
-
- return YAFFS_OK;
-}
-
-struct yaffs_tnode *yaffs_alloc_raw_tnode(struct yaffs_dev *dev)
-{
- struct yaffs_allocator *allocator =
- (struct yaffs_allocator *)dev->allocator;
- struct yaffs_tnode *tn = NULL;
-
- if (!allocator) {
- YBUG();
- return NULL;
- }
-
- /* If there are none left make more */
- if (!allocator->free_tnodes)
- yaffs_create_tnodes(dev, YAFFS_ALLOCATION_NTNODES);
-
- if (allocator->free_tnodes) {
- tn = allocator->free_tnodes;
- allocator->free_tnodes = allocator->free_tnodes->internal[0];
- allocator->n_free_tnodes--;
- }
-
- return tn;
-}
-
-/* FreeTnode frees up a tnode and puts it back on the free list */
-void yaffs_free_raw_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn)
-{
- struct yaffs_allocator *allocator = dev->allocator;
-
- if (!allocator) {
- YBUG();
- return;
- }
-
- if (tn) {
- tn->internal[0] = allocator->free_tnodes;
- allocator->free_tnodes = tn;
- allocator->n_free_tnodes++;
- }
- dev->checkpoint_blocks_required = 0; /* force recalculation */
-}
-
-static void yaffs_init_raw_objs(struct yaffs_dev *dev)
-{
- struct yaffs_allocator *allocator = dev->allocator;
-
- if (allocator) {
- allocator->allocated_obj_list = NULL;
- allocator->free_objs = NULL;
- allocator->n_free_objects = 0;
- } else {
- YBUG();
- }
-}
-
-static void yaffs_deinit_raw_objs(struct yaffs_dev *dev)
-{
- struct yaffs_allocator *allocator = dev->allocator;
- struct yaffs_obj_list *tmp;
-
- if (!allocator) {
- YBUG();
- return;
- }
-
- while (allocator->allocated_obj_list) {
- tmp = allocator->allocated_obj_list->next;
- kfree(allocator->allocated_obj_list->objects);
- kfree(allocator->allocated_obj_list);
-
- allocator->allocated_obj_list = tmp;
- }
-
- allocator->free_objs = NULL;
- allocator->n_free_objects = 0;
- allocator->n_obj_created = 0;
-}
-
-static int yaffs_create_free_objs(struct yaffs_dev *dev, int n_obj)
-{
- struct yaffs_allocator *allocator = dev->allocator;
-
- int i;
- struct yaffs_obj *new_objs;
- struct yaffs_obj_list *list;
-
- if (!allocator) {
- YBUG();
- return YAFFS_FAIL;
- }
-
- if (n_obj < 1)
- return YAFFS_OK;
-
- /* make these things */
- new_objs = kmalloc(n_obj * sizeof(struct yaffs_obj), GFP_NOFS);
- list = kmalloc(sizeof(struct yaffs_obj_list), GFP_NOFS);
-
- if (!new_objs || !list) {
- if (new_objs) {
- kfree(new_objs);
- new_objs = NULL;
- }
- if (list) {
- kfree(list);
- list = NULL;
- }
- yaffs_trace(YAFFS_TRACE_ALLOCATE,
- "Could not allocate more objects");
- return YAFFS_FAIL;
- }
-
- /* Hook them into the free list */
- for (i = 0; i < n_obj - 1; i++) {
- new_objs[i].siblings.next =
- (struct list_head *)(&new_objs[i + 1]);
- }
-
- new_objs[n_obj - 1].siblings.next = (void *)allocator->free_objs;
- allocator->free_objs = new_objs;
- allocator->n_free_objects += n_obj;
- allocator->n_obj_created += n_obj;
-
- /* Now add this bunch of Objects to a list for freeing up. */
-
- list->objects = new_objs;
- list->next = allocator->allocated_obj_list;
- allocator->allocated_obj_list = list;
-
- return YAFFS_OK;
-}
-
-struct yaffs_obj *yaffs_alloc_raw_obj(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj = NULL;
- struct yaffs_allocator *allocator = dev->allocator;
-
- if (!allocator) {
- YBUG();
- return obj;
- }
-
- /* If there are none left make more */
- if (!allocator->free_objs)
- yaffs_create_free_objs(dev, YAFFS_ALLOCATION_NOBJECTS);
-
- if (allocator->free_objs) {
- obj = allocator->free_objs;
- allocator->free_objs =
- (struct yaffs_obj *)(allocator->free_objs->siblings.next);
- allocator->n_free_objects--;
- }
-
- return obj;
-}
-
-void yaffs_free_raw_obj(struct yaffs_dev *dev, struct yaffs_obj *obj)
-{
-
- struct yaffs_allocator *allocator = dev->allocator;
-
- if (!allocator)
- YBUG();
- else {
- /* Link into the free list. */
- obj->siblings.next = (struct list_head *)(allocator->free_objs);
- allocator->free_objs = obj;
- allocator->n_free_objects++;
- }
-}
-
-void yaffs_deinit_raw_tnodes_and_objs(struct yaffs_dev *dev)
-{
- if (dev->allocator) {
- yaffs_deinit_raw_tnodes(dev);
- yaffs_deinit_raw_objs(dev);
-
- kfree(dev->allocator);
- dev->allocator = NULL;
- } else {
- YBUG();
- }
-}
-
-void yaffs_init_raw_tnodes_and_objs(struct yaffs_dev *dev)
-{
- struct yaffs_allocator *allocator;
-
- if (!dev->allocator) {
- allocator = kmalloc(sizeof(struct yaffs_allocator), GFP_NOFS);
- if (allocator) {
- dev->allocator = allocator;
- yaffs_init_raw_tnodes(dev);
- yaffs_init_raw_objs(dev);
- }
- } else {
- YBUG();
- }
-}
-
-#endif
diff --git a/fs/yaffs2/yaffs_allocator.h b/fs/yaffs2/yaffs_allocator.h
deleted file mode 100644
index 4d5f2ae..0000000
--- a/fs/yaffs2/yaffs_allocator.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_ALLOCATOR_H__
-#define __YAFFS_ALLOCATOR_H__
-
-#include "yaffs_guts.h"
-
-void yaffs_init_raw_tnodes_and_objs(struct yaffs_dev *dev);
-void yaffs_deinit_raw_tnodes_and_objs(struct yaffs_dev *dev);
-
-struct yaffs_tnode *yaffs_alloc_raw_tnode(struct yaffs_dev *dev);
-void yaffs_free_raw_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn);
-
-struct yaffs_obj *yaffs_alloc_raw_obj(struct yaffs_dev *dev);
-void yaffs_free_raw_obj(struct yaffs_dev *dev, struct yaffs_obj *obj);
-
-#endif
diff --git a/fs/yaffs2/yaffs_attribs.c b/fs/yaffs2/yaffs_attribs.c
deleted file mode 100644
index 9b47d37..0000000
--- a/fs/yaffs2/yaffs_attribs.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_guts.h"
-#include "yaffs_attribs.h"
-
-void yaffs_load_attribs(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh)
-{
- obj->yst_uid = oh->yst_uid;
- obj->yst_gid = oh->yst_gid;
- obj->yst_atime = oh->yst_atime;
- obj->yst_mtime = oh->yst_mtime;
- obj->yst_ctime = oh->yst_ctime;
- obj->yst_rdev = oh->yst_rdev;
-}
-
-void yaffs_load_attribs_oh(struct yaffs_obj_hdr *oh, struct yaffs_obj *obj)
-{
- oh->yst_uid = obj->yst_uid;
- oh->yst_gid = obj->yst_gid;
- oh->yst_atime = obj->yst_atime;
- oh->yst_mtime = obj->yst_mtime;
- oh->yst_ctime = obj->yst_ctime;
- oh->yst_rdev = obj->yst_rdev;
-
-}
-
-void yaffs_load_current_time(struct yaffs_obj *obj, int do_a, int do_c)
-{
- obj->yst_mtime = Y_CURRENT_TIME;
- if (do_a)
- obj->yst_atime = obj->yst_mtime;
- if (do_c)
- obj->yst_ctime = obj->yst_mtime;
-}
-
-void yaffs_attribs_init(struct yaffs_obj *obj, u32 gid, u32 uid, u32 rdev)
-{
- yaffs_load_current_time(obj, 1, 1);
- obj->yst_rdev = rdev;
- obj->yst_uid = uid;
- obj->yst_gid = gid;
-}
-
-loff_t yaffs_get_file_size(struct yaffs_obj *obj)
-{
- YCHAR *alias = NULL;
- obj = yaffs_get_equivalent_obj(obj);
-
- switch (obj->variant_type) {
- case YAFFS_OBJECT_TYPE_FILE:
- return obj->variant.file_variant.file_size;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- alias = obj->variant.symlink_variant.alias;
- if (!alias)
- return 0;
- return strnlen(alias, YAFFS_MAX_ALIAS_LENGTH);
- default:
- return 0;
- }
-}
-
-int yaffs_set_attribs(struct yaffs_obj *obj, struct iattr *attr)
-{
- unsigned int valid = attr->ia_valid;
-
- if (valid & ATTR_MODE)
- obj->yst_mode = attr->ia_mode;
- if (valid & ATTR_UID)
- obj->yst_uid = attr->ia_uid;
- if (valid & ATTR_GID)
- obj->yst_gid = attr->ia_gid;
-
- if (valid & ATTR_ATIME)
- obj->yst_atime = Y_TIME_CONVERT(attr->ia_atime);
- if (valid & ATTR_CTIME)
- obj->yst_ctime = Y_TIME_CONVERT(attr->ia_ctime);
- if (valid & ATTR_MTIME)
- obj->yst_mtime = Y_TIME_CONVERT(attr->ia_mtime);
-
- if (valid & ATTR_SIZE)
- yaffs_resize_file(obj, attr->ia_size);
-
- yaffs_update_oh(obj, NULL, 1, 0, 0, NULL);
-
- return YAFFS_OK;
-
-}
-
-int yaffs_get_attribs(struct yaffs_obj *obj, struct iattr *attr)
-{
- unsigned int valid = 0;
-
- attr->ia_mode = obj->yst_mode;
- valid |= ATTR_MODE;
- attr->ia_uid = obj->yst_uid;
- valid |= ATTR_UID;
- attr->ia_gid = obj->yst_gid;
- valid |= ATTR_GID;
-
- Y_TIME_CONVERT(attr->ia_atime) = obj->yst_atime;
- valid |= ATTR_ATIME;
- Y_TIME_CONVERT(attr->ia_ctime) = obj->yst_ctime;
- valid |= ATTR_CTIME;
- Y_TIME_CONVERT(attr->ia_mtime) = obj->yst_mtime;
- valid |= ATTR_MTIME;
-
- attr->ia_size = yaffs_get_file_size(obj);
- valid |= ATTR_SIZE;
-
- attr->ia_valid = valid;
-
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_attribs.h b/fs/yaffs2/yaffs_attribs.h
deleted file mode 100644
index 33d541d..0000000
--- a/fs/yaffs2/yaffs_attribs.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_ATTRIBS_H__
-#define __YAFFS_ATTRIBS_H__
-
-#include "yaffs_guts.h"
-
-void yaffs_load_attribs(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh);
-void yaffs_load_attribs_oh(struct yaffs_obj_hdr *oh, struct yaffs_obj *obj);
-void yaffs_attribs_init(struct yaffs_obj *obj, u32 gid, u32 uid, u32 rdev);
-void yaffs_load_current_time(struct yaffs_obj *obj, int do_a, int do_c);
-int yaffs_set_attribs(struct yaffs_obj *obj, struct iattr *attr);
-int yaffs_get_attribs(struct yaffs_obj *obj, struct iattr *attr);
-
-#endif
diff --git a/fs/yaffs2/yaffs_bitmap.c b/fs/yaffs2/yaffs_bitmap.c
deleted file mode 100644
index 7df42cd..0000000
--- a/fs/yaffs2/yaffs_bitmap.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_bitmap.h"
-#include "yaffs_trace.h"
-/*
- * Chunk bitmap manipulations
- */
-
-static inline u8 *yaffs_block_bits(struct yaffs_dev *dev, int blk)
-{
- if (blk < dev->internal_start_block || blk > dev->internal_end_block) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "BlockBits block %d is not valid",
- blk);
- YBUG();
- }
- return dev->chunk_bits +
- (dev->chunk_bit_stride * (blk - dev->internal_start_block));
-}
-
-void yaffs_verify_chunk_bit_id(struct yaffs_dev *dev, int blk, int chunk)
-{
- if (blk < dev->internal_start_block || blk > dev->internal_end_block ||
- chunk < 0 || chunk >= dev->param.chunks_per_block) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "Chunk Id (%d:%d) invalid",
- blk, chunk);
- YBUG();
- }
-}
-
-void yaffs_clear_chunk_bits(struct yaffs_dev *dev, int blk)
-{
- u8 *blk_bits = yaffs_block_bits(dev, blk);
-
- memset(blk_bits, 0, dev->chunk_bit_stride);
-}
-
-void yaffs_clear_chunk_bit(struct yaffs_dev *dev, int blk, int chunk)
-{
- u8 *blk_bits = yaffs_block_bits(dev, blk);
-
- yaffs_verify_chunk_bit_id(dev, blk, chunk);
-
- blk_bits[chunk / 8] &= ~(1 << (chunk & 7));
-}
-
-void yaffs_set_chunk_bit(struct yaffs_dev *dev, int blk, int chunk)
-{
- u8 *blk_bits = yaffs_block_bits(dev, blk);
-
- yaffs_verify_chunk_bit_id(dev, blk, chunk);
-
- blk_bits[chunk / 8] |= (1 << (chunk & 7));
-}
-
-int yaffs_check_chunk_bit(struct yaffs_dev *dev, int blk, int chunk)
-{
- u8 *blk_bits = yaffs_block_bits(dev, blk);
- yaffs_verify_chunk_bit_id(dev, blk, chunk);
-
- return (blk_bits[chunk / 8] & (1 << (chunk & 7))) ? 1 : 0;
-}
-
-int yaffs_still_some_chunks(struct yaffs_dev *dev, int blk)
-{
- u8 *blk_bits = yaffs_block_bits(dev, blk);
- int i;
- for (i = 0; i < dev->chunk_bit_stride; i++) {
- if (*blk_bits)
- return 1;
- blk_bits++;
- }
- return 0;
-}
-
-int yaffs_count_chunk_bits(struct yaffs_dev *dev, int blk)
-{
- u8 *blk_bits = yaffs_block_bits(dev, blk);
- int i;
- int n = 0;
-
- for (i = 0; i < dev->chunk_bit_stride; i++, blk_bits++)
- n += hweight8(*blk_bits);
-
- return n;
-}
diff --git a/fs/yaffs2/yaffs_bitmap.h b/fs/yaffs2/yaffs_bitmap.h
deleted file mode 100644
index cf9ea58..0000000
--- a/fs/yaffs2/yaffs_bitmap.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-/*
- * Chunk bitmap manipulations
- */
-
-#ifndef __YAFFS_BITMAP_H__
-#define __YAFFS_BITMAP_H__
-
-#include "yaffs_guts.h"
-
-void yaffs_verify_chunk_bit_id(struct yaffs_dev *dev, int blk, int chunk);
-void yaffs_clear_chunk_bits(struct yaffs_dev *dev, int blk);
-void yaffs_clear_chunk_bit(struct yaffs_dev *dev, int blk, int chunk);
-void yaffs_set_chunk_bit(struct yaffs_dev *dev, int blk, int chunk);
-int yaffs_check_chunk_bit(struct yaffs_dev *dev, int blk, int chunk);
-int yaffs_still_some_chunks(struct yaffs_dev *dev, int blk);
-int yaffs_count_chunk_bits(struct yaffs_dev *dev, int blk);
-
-#endif
diff --git a/fs/yaffs2/yaffs_checkptrw.c b/fs/yaffs2/yaffs_checkptrw.c
deleted file mode 100644
index 4e40f43..0000000
--- a/fs/yaffs2/yaffs_checkptrw.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_checkptrw.h"
-#include "yaffs_getblockinfo.h"
-
-static int yaffs2_checkpt_space_ok(struct yaffs_dev *dev)
-{
- int blocks_avail = dev->n_erased_blocks - dev->param.n_reserved_blocks;
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "checkpt blocks_avail = %d", blocks_avail);
-
- return (blocks_avail <= 0) ? 0 : 1;
-}
-
-static int yaffs_checkpt_erase(struct yaffs_dev *dev)
-{
- int i;
-
- if (!dev->param.erase_fn)
- return 0;
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "checking blocks %d to %d",
- dev->internal_start_block, dev->internal_end_block);
-
- for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
- struct yaffs_block_info *bi = yaffs_get_block_info(dev, i);
- if (bi->block_state == YAFFS_BLOCK_STATE_CHECKPOINT) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "erasing checkpt block %d", i);
-
- dev->n_erasures++;
-
- if (dev->param.
- erase_fn(dev,
- i - dev->block_offset /* realign */ )) {
- bi->block_state = YAFFS_BLOCK_STATE_EMPTY;
- dev->n_erased_blocks++;
- dev->n_free_chunks +=
- dev->param.chunks_per_block;
- } else {
- dev->param.bad_block_fn(dev, i);
- bi->block_state = YAFFS_BLOCK_STATE_DEAD;
- }
- }
- }
-
- dev->blocks_in_checkpt = 0;
-
- return 1;
-}
-
-static void yaffs2_checkpt_find_erased_block(struct yaffs_dev *dev)
-{
- int i;
- int blocks_avail = dev->n_erased_blocks - dev->param.n_reserved_blocks;
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "allocating checkpt block: erased %d reserved %d avail %d next %d ",
- dev->n_erased_blocks, dev->param.n_reserved_blocks,
- blocks_avail, dev->checkpt_next_block);
-
- if (dev->checkpt_next_block >= 0 &&
- dev->checkpt_next_block <= dev->internal_end_block &&
- blocks_avail > 0) {
-
- for (i = dev->checkpt_next_block; i <= dev->internal_end_block;
- i++) {
- struct yaffs_block_info *bi =
- yaffs_get_block_info(dev, i);
- if (bi->block_state == YAFFS_BLOCK_STATE_EMPTY) {
- dev->checkpt_next_block = i + 1;
- dev->checkpt_cur_block = i;
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "allocating checkpt block %d", i);
- return;
- }
- }
- }
- yaffs_trace(YAFFS_TRACE_CHECKPOINT, "out of checkpt blocks");
-
- dev->checkpt_next_block = -1;
- dev->checkpt_cur_block = -1;
-}
-
-static void yaffs2_checkpt_find_block(struct yaffs_dev *dev)
-{
- int i;
- struct yaffs_ext_tags tags;
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "find next checkpt block: start: blocks %d next %d",
- dev->blocks_in_checkpt, dev->checkpt_next_block);
-
- if (dev->blocks_in_checkpt < dev->checkpt_max_blocks)
- for (i = dev->checkpt_next_block; i <= dev->internal_end_block;
- i++) {
- int chunk = i * dev->param.chunks_per_block;
- int realigned_chunk = chunk - dev->chunk_offset;
-
- dev->param.read_chunk_tags_fn(dev, realigned_chunk,
- NULL, &tags);
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "find next checkpt block: search: block %d oid %d seq %d eccr %d",
- i, tags.obj_id, tags.seq_number,
- tags.ecc_result);
-
- if (tags.seq_number == YAFFS_SEQUENCE_CHECKPOINT_DATA) {
- /* Right kind of block */
- dev->checkpt_next_block = tags.obj_id;
- dev->checkpt_cur_block = i;
- dev->checkpt_block_list[dev->
- blocks_in_checkpt] = i;
- dev->blocks_in_checkpt++;
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "found checkpt block %d", i);
- return;
- }
- }
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT, "found no more checkpt blocks");
-
- dev->checkpt_next_block = -1;
- dev->checkpt_cur_block = -1;
-}
-
-int yaffs2_checkpt_open(struct yaffs_dev *dev, int writing)
-{
-
- dev->checkpt_open_write = writing;
-
- /* Got the functions we need? */
- if (!dev->param.write_chunk_tags_fn ||
- !dev->param.read_chunk_tags_fn ||
- !dev->param.erase_fn || !dev->param.bad_block_fn)
- return 0;
-
- if (writing && !yaffs2_checkpt_space_ok(dev))
- return 0;
-
- if (!dev->checkpt_buffer)
- dev->checkpt_buffer =
- kmalloc(dev->param.total_bytes_per_chunk, GFP_NOFS);
- if (!dev->checkpt_buffer)
- return 0;
-
- dev->checkpt_page_seq = 0;
- dev->checkpt_byte_count = 0;
- dev->checkpt_sum = 0;
- dev->checkpt_xor = 0;
- dev->checkpt_cur_block = -1;
- dev->checkpt_cur_chunk = -1;
- dev->checkpt_next_block = dev->internal_start_block;
-
- /* Erase all the blocks in the checkpoint area */
- if (writing) {
- memset(dev->checkpt_buffer, 0, dev->data_bytes_per_chunk);
- dev->checkpt_byte_offs = 0;
- return yaffs_checkpt_erase(dev);
- } else {
- int i;
- /* Set to a value that will kick off a read */
- dev->checkpt_byte_offs = dev->data_bytes_per_chunk;
- /* A checkpoint block list of 1 checkpoint block per 16 block is (hopefully)
- * going to be way more than we need */
- dev->blocks_in_checkpt = 0;
- dev->checkpt_max_blocks =
- (dev->internal_end_block - dev->internal_start_block) / 16 +
- 2;
- dev->checkpt_block_list =
- kmalloc(sizeof(int) * dev->checkpt_max_blocks, GFP_NOFS);
- if (!dev->checkpt_block_list)
- return 0;
-
- for (i = 0; i < dev->checkpt_max_blocks; i++)
- dev->checkpt_block_list[i] = -1;
- }
-
- return 1;
-}
-
-int yaffs2_get_checkpt_sum(struct yaffs_dev *dev, u32 * sum)
-{
- u32 composite_sum;
- composite_sum = (dev->checkpt_sum << 8) | (dev->checkpt_xor & 0xFF);
- *sum = composite_sum;
- return 1;
-}
-
-static int yaffs2_checkpt_flush_buffer(struct yaffs_dev *dev)
-{
- int chunk;
- int realigned_chunk;
-
- struct yaffs_ext_tags tags;
-
- if (dev->checkpt_cur_block < 0) {
- yaffs2_checkpt_find_erased_block(dev);
- dev->checkpt_cur_chunk = 0;
- }
-
- if (dev->checkpt_cur_block < 0)
- return 0;
-
- tags.is_deleted = 0;
- tags.obj_id = dev->checkpt_next_block; /* Hint to next place to look */
- tags.chunk_id = dev->checkpt_page_seq + 1;
- tags.seq_number = YAFFS_SEQUENCE_CHECKPOINT_DATA;
- tags.n_bytes = dev->data_bytes_per_chunk;
- if (dev->checkpt_cur_chunk == 0) {
- /* First chunk we write for the block? Set block state to
- checkpoint */
- struct yaffs_block_info *bi =
- yaffs_get_block_info(dev, dev->checkpt_cur_block);
- bi->block_state = YAFFS_BLOCK_STATE_CHECKPOINT;
- dev->blocks_in_checkpt++;
- }
-
- chunk =
- dev->checkpt_cur_block * dev->param.chunks_per_block +
- dev->checkpt_cur_chunk;
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "checkpoint wite buffer nand %d(%d:%d) objid %d chId %d",
- chunk, dev->checkpt_cur_block, dev->checkpt_cur_chunk,
- tags.obj_id, tags.chunk_id);
-
- realigned_chunk = chunk - dev->chunk_offset;
-
- dev->n_page_writes++;
-
- dev->param.write_chunk_tags_fn(dev, realigned_chunk,
- dev->checkpt_buffer, &tags);
- dev->checkpt_byte_offs = 0;
- dev->checkpt_page_seq++;
- dev->checkpt_cur_chunk++;
- if (dev->checkpt_cur_chunk >= dev->param.chunks_per_block) {
- dev->checkpt_cur_chunk = 0;
- dev->checkpt_cur_block = -1;
- }
- memset(dev->checkpt_buffer, 0, dev->data_bytes_per_chunk);
-
- return 1;
-}
-
-int yaffs2_checkpt_wr(struct yaffs_dev *dev, const void *data, int n_bytes)
-{
- int i = 0;
- int ok = 1;
-
- u8 *data_bytes = (u8 *) data;
-
- if (!dev->checkpt_buffer)
- return 0;
-
- if (!dev->checkpt_open_write)
- return -1;
-
- while (i < n_bytes && ok) {
- dev->checkpt_buffer[dev->checkpt_byte_offs] = *data_bytes;
- dev->checkpt_sum += *data_bytes;
- dev->checkpt_xor ^= *data_bytes;
-
- dev->checkpt_byte_offs++;
- i++;
- data_bytes++;
- dev->checkpt_byte_count++;
-
- if (dev->checkpt_byte_offs < 0 ||
- dev->checkpt_byte_offs >= dev->data_bytes_per_chunk)
- ok = yaffs2_checkpt_flush_buffer(dev);
- }
-
- return i;
-}
-
-int yaffs2_checkpt_rd(struct yaffs_dev *dev, void *data, int n_bytes)
-{
- int i = 0;
- int ok = 1;
- struct yaffs_ext_tags tags;
-
- int chunk;
- int realigned_chunk;
-
- u8 *data_bytes = (u8 *) data;
-
- if (!dev->checkpt_buffer)
- return 0;
-
- if (dev->checkpt_open_write)
- return -1;
-
- while (i < n_bytes && ok) {
-
- if (dev->checkpt_byte_offs < 0 ||
- dev->checkpt_byte_offs >= dev->data_bytes_per_chunk) {
-
- if (dev->checkpt_cur_block < 0) {
- yaffs2_checkpt_find_block(dev);
- dev->checkpt_cur_chunk = 0;
- }
-
- if (dev->checkpt_cur_block < 0)
- ok = 0;
- else {
- chunk = dev->checkpt_cur_block *
- dev->param.chunks_per_block +
- dev->checkpt_cur_chunk;
-
- realigned_chunk = chunk - dev->chunk_offset;
-
- dev->n_page_reads++;
-
- /* read in the next chunk */
- dev->param.read_chunk_tags_fn(dev,
- realigned_chunk,
- dev->
- checkpt_buffer,
- &tags);
-
- if (tags.chunk_id != (dev->checkpt_page_seq + 1)
- || tags.ecc_result > YAFFS_ECC_RESULT_FIXED
- || tags.seq_number !=
- YAFFS_SEQUENCE_CHECKPOINT_DATA)
- ok = 0;
-
- dev->checkpt_byte_offs = 0;
- dev->checkpt_page_seq++;
- dev->checkpt_cur_chunk++;
-
- if (dev->checkpt_cur_chunk >=
- dev->param.chunks_per_block)
- dev->checkpt_cur_block = -1;
- }
- }
-
- if (ok) {
- *data_bytes =
- dev->checkpt_buffer[dev->checkpt_byte_offs];
- dev->checkpt_sum += *data_bytes;
- dev->checkpt_xor ^= *data_bytes;
- dev->checkpt_byte_offs++;
- i++;
- data_bytes++;
- dev->checkpt_byte_count++;
- }
- }
-
- return i;
-}
-
-int yaffs_checkpt_close(struct yaffs_dev *dev)
-{
-
- if (dev->checkpt_open_write) {
- if (dev->checkpt_byte_offs != 0)
- yaffs2_checkpt_flush_buffer(dev);
- } else if (dev->checkpt_block_list) {
- int i;
- for (i = 0;
- i < dev->blocks_in_checkpt
- && dev->checkpt_block_list[i] >= 0; i++) {
- int blk = dev->checkpt_block_list[i];
- struct yaffs_block_info *bi = NULL;
- if (dev->internal_start_block <= blk
- && blk <= dev->internal_end_block)
- bi = yaffs_get_block_info(dev, blk);
- if (bi && bi->block_state == YAFFS_BLOCK_STATE_EMPTY)
- bi->block_state = YAFFS_BLOCK_STATE_CHECKPOINT;
- else {
- /* Todo this looks odd... */
- }
- }
- kfree(dev->checkpt_block_list);
- dev->checkpt_block_list = NULL;
- }
-
- dev->n_free_chunks -=
- dev->blocks_in_checkpt * dev->param.chunks_per_block;
- dev->n_erased_blocks -= dev->blocks_in_checkpt;
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,"checkpoint byte count %d",
- dev->checkpt_byte_count);
-
- if (dev->checkpt_buffer) {
- /* free the buffer */
- kfree(dev->checkpt_buffer);
- dev->checkpt_buffer = NULL;
- return 1;
- } else {
- return 0;
- }
-}
-
-int yaffs2_checkpt_invalidate_stream(struct yaffs_dev *dev)
-{
- /* Erase the checkpoint data */
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "checkpoint invalidate of %d blocks",
- dev->blocks_in_checkpt);
-
- return yaffs_checkpt_erase(dev);
-}
diff --git a/fs/yaffs2/yaffs_checkptrw.h b/fs/yaffs2/yaffs_checkptrw.h
deleted file mode 100644
index 361c606..0000000
--- a/fs/yaffs2/yaffs_checkptrw.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_CHECKPTRW_H__
-#define __YAFFS_CHECKPTRW_H__
-
-#include "yaffs_guts.h"
-
-int yaffs2_checkpt_open(struct yaffs_dev *dev, int writing);
-
-int yaffs2_checkpt_wr(struct yaffs_dev *dev, const void *data, int n_bytes);
-
-int yaffs2_checkpt_rd(struct yaffs_dev *dev, void *data, int n_bytes);
-
-int yaffs2_get_checkpt_sum(struct yaffs_dev *dev, u32 * sum);
-
-int yaffs_checkpt_close(struct yaffs_dev *dev);
-
-int yaffs2_checkpt_invalidate_stream(struct yaffs_dev *dev);
-
-#endif
diff --git a/fs/yaffs2/yaffs_ecc.c b/fs/yaffs2/yaffs_ecc.c
deleted file mode 100644
index e95a806..0000000
--- a/fs/yaffs2/yaffs_ecc.c
+++ /dev/null
@@ -1,298 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * This code implements the ECC algorithm used in SmartMedia.
- *
- * The ECC comprises 22 bits of parity information and is stuffed into 3 bytes.
- * The two unused bit are set to 1.
- * The ECC can correct single bit errors in a 256-byte page of data. Thus, two such ECC
- * blocks are used on a 512-byte NAND page.
- *
- */
-
-/* Table generated by gen-ecc.c
- * Using a table means we do not have to calculate p1..p4 and p1'..p4'
- * for each byte of data. These are instead provided in a table in bits7..2.
- * Bit 0 of each entry indicates whether the entry has an odd or even parity, and therefore
- * this bytes influence on the line parity.
- */
-
-#include "yportenv.h"
-
-#include "yaffs_ecc.h"
-
-static const unsigned char column_parity_table[] = {
- 0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69,
- 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00,
- 0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc,
- 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95,
- 0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0,
- 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99,
- 0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65,
- 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c,
- 0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc,
- 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5,
- 0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59,
- 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30,
- 0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55,
- 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c,
- 0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0,
- 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9,
- 0xa9, 0xfc, 0xf0, 0xa5, 0xcc, 0x99, 0x95, 0xc0,
- 0xc0, 0x95, 0x99, 0xcc, 0xa5, 0xf0, 0xfc, 0xa9,
- 0x3c, 0x69, 0x65, 0x30, 0x59, 0x0c, 0x00, 0x55,
- 0x55, 0x00, 0x0c, 0x59, 0x30, 0x65, 0x69, 0x3c,
- 0x30, 0x65, 0x69, 0x3c, 0x55, 0x00, 0x0c, 0x59,
- 0x59, 0x0c, 0x00, 0x55, 0x3c, 0x69, 0x65, 0x30,
- 0xa5, 0xf0, 0xfc, 0xa9, 0xc0, 0x95, 0x99, 0xcc,
- 0xcc, 0x99, 0x95, 0xc0, 0xa9, 0xfc, 0xf0, 0xa5,
- 0x0c, 0x59, 0x55, 0x00, 0x69, 0x3c, 0x30, 0x65,
- 0x65, 0x30, 0x3c, 0x69, 0x00, 0x55, 0x59, 0x0c,
- 0x99, 0xcc, 0xc0, 0x95, 0xfc, 0xa9, 0xa5, 0xf0,
- 0xf0, 0xa5, 0xa9, 0xfc, 0x95, 0xc0, 0xcc, 0x99,
- 0x95, 0xc0, 0xcc, 0x99, 0xf0, 0xa5, 0xa9, 0xfc,
- 0xfc, 0xa9, 0xa5, 0xf0, 0x99, 0xcc, 0xc0, 0x95,
- 0x00, 0x55, 0x59, 0x0c, 0x65, 0x30, 0x3c, 0x69,
- 0x69, 0x3c, 0x30, 0x65, 0x0c, 0x59, 0x55, 0x00,
-};
-
-
-/* Calculate the ECC for a 256-byte block of data */
-void yaffs_ecc_cacl(const unsigned char *data, unsigned char *ecc)
-{
- unsigned int i;
-
- unsigned char col_parity = 0;
- unsigned char line_parity = 0;
- unsigned char line_parity_prime = 0;
- unsigned char t;
- unsigned char b;
-
- for (i = 0; i < 256; i++) {
- b = column_parity_table[*data++];
- col_parity ^= b;
-
- if (b & 0x01) { /* odd number of bits in the byte */
- line_parity ^= i;
- line_parity_prime ^= ~i;
- }
- }
-
- ecc[2] = (~col_parity) | 0x03;
-
- t = 0;
- if (line_parity & 0x80)
- t |= 0x80;
- if (line_parity_prime & 0x80)
- t |= 0x40;
- if (line_parity & 0x40)
- t |= 0x20;
- if (line_parity_prime & 0x40)
- t |= 0x10;
- if (line_parity & 0x20)
- t |= 0x08;
- if (line_parity_prime & 0x20)
- t |= 0x04;
- if (line_parity & 0x10)
- t |= 0x02;
- if (line_parity_prime & 0x10)
- t |= 0x01;
- ecc[1] = ~t;
-
- t = 0;
- if (line_parity & 0x08)
- t |= 0x80;
- if (line_parity_prime & 0x08)
- t |= 0x40;
- if (line_parity & 0x04)
- t |= 0x20;
- if (line_parity_prime & 0x04)
- t |= 0x10;
- if (line_parity & 0x02)
- t |= 0x08;
- if (line_parity_prime & 0x02)
- t |= 0x04;
- if (line_parity & 0x01)
- t |= 0x02;
- if (line_parity_prime & 0x01)
- t |= 0x01;
- ecc[0] = ~t;
-
-#ifdef CONFIG_YAFFS_ECC_WRONG_ORDER
- /* Swap the bytes into the wrong order */
- t = ecc[0];
- ecc[0] = ecc[1];
- ecc[1] = t;
-#endif
-}
-
-/* Correct the ECC on a 256 byte block of data */
-
-int yaffs_ecc_correct(unsigned char *data, unsigned char *read_ecc,
- const unsigned char *test_ecc)
-{
- unsigned char d0, d1, d2; /* deltas */
-
- d0 = read_ecc[0] ^ test_ecc[0];
- d1 = read_ecc[1] ^ test_ecc[1];
- d2 = read_ecc[2] ^ test_ecc[2];
-
- if ((d0 | d1 | d2) == 0)
- return 0; /* no error */
-
- if (((d0 ^ (d0 >> 1)) & 0x55) == 0x55 &&
- ((d1 ^ (d1 >> 1)) & 0x55) == 0x55 &&
- ((d2 ^ (d2 >> 1)) & 0x54) == 0x54) {
- /* Single bit (recoverable) error in data */
-
- unsigned byte;
- unsigned bit;
-
-#ifdef CONFIG_YAFFS_ECC_WRONG_ORDER
- /* swap the bytes to correct for the wrong order */
- unsigned char t;
-
- t = d0;
- d0 = d1;
- d1 = t;
-#endif
-
- bit = byte = 0;
-
- if (d1 & 0x80)
- byte |= 0x80;
- if (d1 & 0x20)
- byte |= 0x40;
- if (d1 & 0x08)
- byte |= 0x20;
- if (d1 & 0x02)
- byte |= 0x10;
- if (d0 & 0x80)
- byte |= 0x08;
- if (d0 & 0x20)
- byte |= 0x04;
- if (d0 & 0x08)
- byte |= 0x02;
- if (d0 & 0x02)
- byte |= 0x01;
-
- if (d2 & 0x80)
- bit |= 0x04;
- if (d2 & 0x20)
- bit |= 0x02;
- if (d2 & 0x08)
- bit |= 0x01;
-
- data[byte] ^= (1 << bit);
-
- return 1; /* Corrected the error */
- }
-
- if ((hweight8(d0) + hweight8(d1) + hweight8(d2)) == 1) {
- /* Reccoverable error in ecc */
-
- read_ecc[0] = test_ecc[0];
- read_ecc[1] = test_ecc[1];
- read_ecc[2] = test_ecc[2];
-
- return 1; /* Corrected the error */
- }
-
- /* Unrecoverable error */
-
- return -1;
-
-}
-
-/*
- * ECCxxxOther does ECC calcs on arbitrary n bytes of data
- */
-void yaffs_ecc_calc_other(const unsigned char *data, unsigned n_bytes,
- struct yaffs_ecc_other *ecc_other)
-{
- unsigned int i;
-
- unsigned char col_parity = 0;
- unsigned line_parity = 0;
- unsigned line_parity_prime = 0;
- unsigned char b;
-
- for (i = 0; i < n_bytes; i++) {
- b = column_parity_table[*data++];
- col_parity ^= b;
-
- if (b & 0x01) {
- /* odd number of bits in the byte */
- line_parity ^= i;
- line_parity_prime ^= ~i;
- }
-
- }
-
- ecc_other->col_parity = (col_parity >> 2) & 0x3f;
- ecc_other->line_parity = line_parity;
- ecc_other->line_parity_prime = line_parity_prime;
-}
-
-int yaffs_ecc_correct_other(unsigned char *data, unsigned n_bytes,
- struct yaffs_ecc_other *read_ecc,
- const struct yaffs_ecc_other *test_ecc)
-{
- unsigned char delta_col; /* column parity delta */
- unsigned delta_line; /* line parity delta */
- unsigned delta_line_prime; /* line parity delta */
- unsigned bit;
-
- delta_col = read_ecc->col_parity ^ test_ecc->col_parity;
- delta_line = read_ecc->line_parity ^ test_ecc->line_parity;
- delta_line_prime =
- read_ecc->line_parity_prime ^ test_ecc->line_parity_prime;
-
- if ((delta_col | delta_line | delta_line_prime) == 0)
- return 0; /* no error */
-
- if (delta_line == ~delta_line_prime &&
- (((delta_col ^ (delta_col >> 1)) & 0x15) == 0x15)) {
- /* Single bit (recoverable) error in data */
-
- bit = 0;
-
- if (delta_col & 0x20)
- bit |= 0x04;
- if (delta_col & 0x08)
- bit |= 0x02;
- if (delta_col & 0x02)
- bit |= 0x01;
-
- if (delta_line >= n_bytes)
- return -1;
-
- data[delta_line] ^= (1 << bit);
-
- return 1; /* corrected */
- }
-
- if ((hweight32(delta_line) +
- hweight32(delta_line_prime) +
- hweight8(delta_col)) == 1) {
- /* Reccoverable error in ecc */
-
- *read_ecc = *test_ecc;
- return 1; /* corrected */
- }
-
- /* Unrecoverable error */
-
- return -1;
-}
diff --git a/fs/yaffs2/yaffs_ecc.h b/fs/yaffs2/yaffs_ecc.h
deleted file mode 100644
index b0c461d..0000000
--- a/fs/yaffs2/yaffs_ecc.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-/*
- * This code implements the ECC algorithm used in SmartMedia.
- *
- * The ECC comprises 22 bits of parity information and is stuffed into 3 bytes.
- * The two unused bit are set to 1.
- * The ECC can correct single bit errors in a 256-byte page of data. Thus, two such ECC
- * blocks are used on a 512-byte NAND page.
- *
- */
-
-#ifndef __YAFFS_ECC_H__
-#define __YAFFS_ECC_H__
-
-struct yaffs_ecc_other {
- unsigned char col_parity;
- unsigned line_parity;
- unsigned line_parity_prime;
-};
-
-void yaffs_ecc_cacl(const unsigned char *data, unsigned char *ecc);
-int yaffs_ecc_correct(unsigned char *data, unsigned char *read_ecc,
- const unsigned char *test_ecc);
-
-void yaffs_ecc_calc_other(const unsigned char *data, unsigned n_bytes,
- struct yaffs_ecc_other *ecc);
-int yaffs_ecc_correct_other(unsigned char *data, unsigned n_bytes,
- struct yaffs_ecc_other *read_ecc,
- const struct yaffs_ecc_other *test_ecc);
-#endif
diff --git a/fs/yaffs2/yaffs_getblockinfo.h b/fs/yaffs2/yaffs_getblockinfo.h
deleted file mode 100644
index d87acbd..0000000
--- a/fs/yaffs2/yaffs_getblockinfo.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_GETBLOCKINFO_H__
-#define __YAFFS_GETBLOCKINFO_H__
-
-#include "yaffs_guts.h"
-#include "yaffs_trace.h"
-
-/* Function to manipulate block info */
-static inline struct yaffs_block_info *yaffs_get_block_info(struct yaffs_dev
- *dev, int blk)
-{
- if (blk < dev->internal_start_block || blk > dev->internal_end_block) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>> yaffs: get_block_info block %d is not valid",
- blk);
- YBUG();
- }
- return &dev->block_info[blk - dev->internal_start_block];
-}
-
-#endif
diff --git a/fs/yaffs2/yaffs_guts.c b/fs/yaffs2/yaffs_guts.c
deleted file mode 100644
index f4ae9de..0000000
--- a/fs/yaffs2/yaffs_guts.c
+++ /dev/null
@@ -1,5164 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yportenv.h"
-#include "yaffs_trace.h"
-
-#include "yaffs_guts.h"
-#include "yaffs_tagsvalidity.h"
-#include "yaffs_getblockinfo.h"
-
-#include "yaffs_tagscompat.h"
-
-#include "yaffs_nand.h"
-
-#include "yaffs_yaffs1.h"
-#include "yaffs_yaffs2.h"
-#include "yaffs_bitmap.h"
-#include "yaffs_verify.h"
-
-#include "yaffs_nand.h"
-#include "yaffs_packedtags2.h"
-
-#include "yaffs_nameval.h"
-#include "yaffs_allocator.h"
-
-#include "yaffs_attribs.h"
-
-/* Note YAFFS_GC_GOOD_ENOUGH must be <= YAFFS_GC_PASSIVE_THRESHOLD */
-#define YAFFS_GC_GOOD_ENOUGH 2
-#define YAFFS_GC_PASSIVE_THRESHOLD 4
-
-#include "yaffs_ecc.h"
-
-/* Forward declarations */
-
-static int yaffs_wr_data_obj(struct yaffs_obj *in, int inode_chunk,
- const u8 * buffer, int n_bytes, int use_reserve);
-
-
-
-/* Function to calculate chunk and offset */
-
-static void yaffs_addr_to_chunk(struct yaffs_dev *dev, loff_t addr,
- int *chunk_out, u32 * offset_out)
-{
- int chunk;
- u32 offset;
-
- chunk = (u32) (addr >> dev->chunk_shift);
-
- if (dev->chunk_div == 1) {
- /* easy power of 2 case */
- offset = (u32) (addr & dev->chunk_mask);
- } else {
- /* Non power-of-2 case */
-
- loff_t chunk_base;
-
- chunk /= dev->chunk_div;
-
- chunk_base = ((loff_t) chunk) * dev->data_bytes_per_chunk;
- offset = (u32) (addr - chunk_base);
- }
-
- *chunk_out = chunk;
- *offset_out = offset;
-}
-
-/* Function to return the number of shifts for a power of 2 greater than or
- * equal to the given number
- * Note we don't try to cater for all possible numbers and this does not have to
- * be hellishly efficient.
- */
-
-static u32 calc_shifts_ceiling(u32 x)
-{
- int extra_bits;
- int shifts;
-
- shifts = extra_bits = 0;
-
- while (x > 1) {
- if (x & 1)
- extra_bits++;
- x >>= 1;
- shifts++;
- }
-
- if (extra_bits)
- shifts++;
-
- return shifts;
-}
-
-/* Function to return the number of shifts to get a 1 in bit 0
- */
-
-static u32 calc_shifts(u32 x)
-{
- u32 shifts;
-
- shifts = 0;
-
- if (!x)
- return 0;
-
- while (!(x & 1)) {
- x >>= 1;
- shifts++;
- }
-
- return shifts;
-}
-
-/*
- * Temporary buffer manipulations.
- */
-
-static int yaffs_init_tmp_buffers(struct yaffs_dev *dev)
-{
- int i;
- u8 *buf = (u8 *) 1;
-
- memset(dev->temp_buffer, 0, sizeof(dev->temp_buffer));
-
- for (i = 0; buf && i < YAFFS_N_TEMP_BUFFERS; i++) {
- dev->temp_buffer[i].line = 0; /* not in use */
- dev->temp_buffer[i].buffer = buf =
- kmalloc(dev->param.total_bytes_per_chunk, GFP_NOFS);
- }
-
- return buf ? YAFFS_OK : YAFFS_FAIL;
-}
-
-u8 *yaffs_get_temp_buffer(struct yaffs_dev * dev, int line_no)
-{
- int i, j;
-
- dev->temp_in_use++;
- if (dev->temp_in_use > dev->max_temp)
- dev->max_temp = dev->temp_in_use;
-
- for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) {
- if (dev->temp_buffer[i].line == 0) {
- dev->temp_buffer[i].line = line_no;
- if ((i + 1) > dev->max_temp) {
- dev->max_temp = i + 1;
- for (j = 0; j <= i; j++)
- dev->temp_buffer[j].max_line =
- dev->temp_buffer[j].line;
- }
-
- return dev->temp_buffer[i].buffer;
- }
- }
-
- yaffs_trace(YAFFS_TRACE_BUFFERS,
- "Out of temp buffers at line %d, other held by lines:",
- line_no);
- for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++)
- yaffs_trace(YAFFS_TRACE_BUFFERS," %d", dev->temp_buffer[i].line);
-
- /*
- * If we got here then we have to allocate an unmanaged one
- * This is not good.
- */
-
- dev->unmanaged_buffer_allocs++;
- return kmalloc(dev->data_bytes_per_chunk, GFP_NOFS);
-
-}
-
-void yaffs_release_temp_buffer(struct yaffs_dev *dev, u8 * buffer, int line_no)
-{
- int i;
-
- dev->temp_in_use--;
-
- for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) {
- if (dev->temp_buffer[i].buffer == buffer) {
- dev->temp_buffer[i].line = 0;
- return;
- }
- }
-
- if (buffer) {
- /* assume it is an unmanaged one. */
- yaffs_trace(YAFFS_TRACE_BUFFERS,
- "Releasing unmanaged temp buffer in line %d",
- line_no);
- kfree(buffer);
- dev->unmanaged_buffer_deallocs++;
- }
-
-}
-
-/*
- * Determine if we have a managed buffer.
- */
-int yaffs_is_managed_tmp_buffer(struct yaffs_dev *dev, const u8 * buffer)
-{
- int i;
-
- for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++) {
- if (dev->temp_buffer[i].buffer == buffer)
- return 1;
- }
-
- for (i = 0; i < dev->param.n_caches; i++) {
- if (dev->cache[i].data == buffer)
- return 1;
- }
-
- if (buffer == dev->checkpt_buffer)
- return 1;
-
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs: unmaged buffer detected.");
- return 0;
-}
-
-/*
- * Functions for robustisizing TODO
- *
- */
-
-static void yaffs_handle_chunk_wr_ok(struct yaffs_dev *dev, int nand_chunk,
- const u8 * data,
- const struct yaffs_ext_tags *tags)
-{
- dev = dev;
- nand_chunk = nand_chunk;
- data = data;
- tags = tags;
-}
-
-static void yaffs_handle_chunk_update(struct yaffs_dev *dev, int nand_chunk,
- const struct yaffs_ext_tags *tags)
-{
- dev = dev;
- nand_chunk = nand_chunk;
- tags = tags;
-}
-
-void yaffs_handle_chunk_error(struct yaffs_dev *dev,
- struct yaffs_block_info *bi)
-{
- if (!bi->gc_prioritise) {
- bi->gc_prioritise = 1;
- dev->has_pending_prioritised_gc = 1;
- bi->chunk_error_strikes++;
-
- if (bi->chunk_error_strikes > 3) {
- bi->needs_retiring = 1; /* Too many stikes, so retire this */
- yaffs_trace(YAFFS_TRACE_ALWAYS, "yaffs: Block struck out");
-
- }
- }
-}
-
-static void yaffs_handle_chunk_wr_error(struct yaffs_dev *dev, int nand_chunk,
- int erased_ok)
-{
- int flash_block = nand_chunk / dev->param.chunks_per_block;
- struct yaffs_block_info *bi = yaffs_get_block_info(dev, flash_block);
-
- yaffs_handle_chunk_error(dev, bi);
-
- if (erased_ok) {
- /* Was an actual write failure, so mark the block for retirement */
- bi->needs_retiring = 1;
- yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
- "**>> Block %d needs retiring", flash_block);
- }
-
- /* Delete the chunk */
- yaffs_chunk_del(dev, nand_chunk, 1, __LINE__);
- yaffs_skip_rest_of_block(dev);
-}
-
-/*
- * Verification code
- */
-
-/*
- * Simple hash function. Needs to have a reasonable spread
- */
-
-static inline int yaffs_hash_fn(int n)
-{
- n = abs(n);
- return n % YAFFS_NOBJECT_BUCKETS;
-}
-
-/*
- * Access functions to useful fake objects.
- * Note that root might have a presence in NAND if permissions are set.
- */
-
-struct yaffs_obj *yaffs_root(struct yaffs_dev *dev)
-{
- return dev->root_dir;
-}
-
-struct yaffs_obj *yaffs_lost_n_found(struct yaffs_dev *dev)
-{
- return dev->lost_n_found;
-}
-
-/*
- * Erased NAND checking functions
- */
-
-int yaffs_check_ff(u8 * buffer, int n_bytes)
-{
- /* Horrible, slow implementation */
- while (n_bytes--) {
- if (*buffer != 0xFF)
- return 0;
- buffer++;
- }
- return 1;
-}
-
-static int yaffs_check_chunk_erased(struct yaffs_dev *dev, int nand_chunk)
-{
- int retval = YAFFS_OK;
- u8 *data = yaffs_get_temp_buffer(dev, __LINE__);
- struct yaffs_ext_tags tags;
- int result;
-
- result = yaffs_rd_chunk_tags_nand(dev, nand_chunk, data, &tags);
-
- if (tags.ecc_result > YAFFS_ECC_RESULT_NO_ERROR)
- retval = YAFFS_FAIL;
-
- if (!yaffs_check_ff(data, dev->data_bytes_per_chunk) ||
- tags.chunk_used) {
- yaffs_trace(YAFFS_TRACE_NANDACCESS, "Chunk %d not erased", nand_chunk);
- retval = YAFFS_FAIL;
- }
-
- yaffs_release_temp_buffer(dev, data, __LINE__);
-
- return retval;
-
-}
-
-static int yaffs_verify_chunk_written(struct yaffs_dev *dev,
- int nand_chunk,
- const u8 * data,
- struct yaffs_ext_tags *tags)
-{
- int retval = YAFFS_OK;
- struct yaffs_ext_tags temp_tags;
- u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
- int result;
-
- result = yaffs_rd_chunk_tags_nand(dev, nand_chunk, buffer, &temp_tags);
- if (memcmp(buffer, data, dev->data_bytes_per_chunk) ||
- temp_tags.obj_id != tags->obj_id ||
- temp_tags.chunk_id != tags->chunk_id ||
- temp_tags.n_bytes != tags->n_bytes)
- retval = YAFFS_FAIL;
-
- yaffs_release_temp_buffer(dev, buffer, __LINE__);
-
- return retval;
-}
-
-
-int yaffs_check_alloc_available(struct yaffs_dev *dev, int n_chunks)
-{
- int reserved_chunks;
- int reserved_blocks = dev->param.n_reserved_blocks;
- int checkpt_blocks;
-
- checkpt_blocks = yaffs_calc_checkpt_blocks_required(dev);
-
- reserved_chunks =
- ((reserved_blocks + checkpt_blocks) * dev->param.chunks_per_block);
-
- return (dev->n_free_chunks > (reserved_chunks + n_chunks));
-}
-
-static int yaffs_find_alloc_block(struct yaffs_dev *dev)
-{
- int i;
-
- struct yaffs_block_info *bi;
-
- if (dev->n_erased_blocks < 1) {
- /* Hoosterman we've got a problem.
- * Can't get space to gc
- */
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: no more erased blocks" );
-
- return -1;
- }
-
- /* Find an empty block. */
-
- for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
- dev->alloc_block_finder++;
- if (dev->alloc_block_finder < dev->internal_start_block
- || dev->alloc_block_finder > dev->internal_end_block) {
- dev->alloc_block_finder = dev->internal_start_block;
- }
-
- bi = yaffs_get_block_info(dev, dev->alloc_block_finder);
-
- if (bi->block_state == YAFFS_BLOCK_STATE_EMPTY) {
- bi->block_state = YAFFS_BLOCK_STATE_ALLOCATING;
- dev->seq_number++;
- bi->seq_number = dev->seq_number;
- dev->n_erased_blocks--;
- yaffs_trace(YAFFS_TRACE_ALLOCATE,
- "Allocated block %d, seq %d, %d left" ,
- dev->alloc_block_finder, dev->seq_number,
- dev->n_erased_blocks);
- return dev->alloc_block_finder;
- }
- }
-
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs tragedy: no more erased blocks, but there should have been %d",
- dev->n_erased_blocks);
-
- return -1;
-}
-
-static int yaffs_alloc_chunk(struct yaffs_dev *dev, int use_reserver,
- struct yaffs_block_info **block_ptr)
-{
- int ret_val;
- struct yaffs_block_info *bi;
-
- if (dev->alloc_block < 0) {
- /* Get next block to allocate off */
- dev->alloc_block = yaffs_find_alloc_block(dev);
- dev->alloc_page = 0;
- }
-
- if (!use_reserver && !yaffs_check_alloc_available(dev, 1)) {
- /* Not enough space to allocate unless we're allowed to use the reserve. */
- return -1;
- }
-
- if (dev->n_erased_blocks < dev->param.n_reserved_blocks
- && dev->alloc_page == 0)
- yaffs_trace(YAFFS_TRACE_ALLOCATE, "Allocating reserve");
-
- /* Next page please.... */
- if (dev->alloc_block >= 0) {
- bi = yaffs_get_block_info(dev, dev->alloc_block);
-
- ret_val = (dev->alloc_block * dev->param.chunks_per_block) +
- dev->alloc_page;
- bi->pages_in_use++;
- yaffs_set_chunk_bit(dev, dev->alloc_block, dev->alloc_page);
-
- dev->alloc_page++;
-
- dev->n_free_chunks--;
-
- /* If the block is full set the state to full */
- if (dev->alloc_page >= dev->param.chunks_per_block) {
- bi->block_state = YAFFS_BLOCK_STATE_FULL;
- dev->alloc_block = -1;
- }
-
- if (block_ptr)
- *block_ptr = bi;
-
- return ret_val;
- }
-
- yaffs_trace(YAFFS_TRACE_ERROR, "!!!!!!!!! Allocator out !!!!!!!!!!!!!!!!!" );
-
- return -1;
-}
-
-static int yaffs_get_erased_chunks(struct yaffs_dev *dev)
-{
- int n;
-
- n = dev->n_erased_blocks * dev->param.chunks_per_block;
-
- if (dev->alloc_block > 0)
- n += (dev->param.chunks_per_block - dev->alloc_page);
-
- return n;
-
-}
-
-/*
- * yaffs_skip_rest_of_block() skips over the rest of the allocation block
- * if we don't want to write to it.
- */
-void yaffs_skip_rest_of_block(struct yaffs_dev *dev)
-{
- if (dev->alloc_block > 0) {
- struct yaffs_block_info *bi =
- yaffs_get_block_info(dev, dev->alloc_block);
- if (bi->block_state == YAFFS_BLOCK_STATE_ALLOCATING) {
- bi->block_state = YAFFS_BLOCK_STATE_FULL;
- dev->alloc_block = -1;
- }
- }
-}
-
-static int yaffs_write_new_chunk(struct yaffs_dev *dev,
- const u8 * data,
- struct yaffs_ext_tags *tags, int use_reserver)
-{
- int attempts = 0;
- int write_ok = 0;
- int chunk;
-
- yaffs2_checkpt_invalidate(dev);
-
- do {
- struct yaffs_block_info *bi = 0;
- int erased_ok = 0;
-
- chunk = yaffs_alloc_chunk(dev, use_reserver, &bi);
- if (chunk < 0) {
- /* no space */
- break;
- }
-
- /* First check this chunk is erased, if it needs
- * checking. The checking policy (unless forced
- * always on) is as follows:
- *
- * Check the first page we try to write in a block.
- * If the check passes then we don't need to check any
- * more. If the check fails, we check again...
- * If the block has been erased, we don't need to check.
- *
- * However, if the block has been prioritised for gc,
- * then we think there might be something odd about
- * this block and stop using it.
- *
- * Rationale: We should only ever see chunks that have
- * not been erased if there was a partially written
- * chunk due to power loss. This checking policy should
- * catch that case with very few checks and thus save a
- * lot of checks that are most likely not needed.
- *
- * Mods to the above
- * If an erase check fails or the write fails we skip the
- * rest of the block.
- */
-
- /* let's give it a try */
- attempts++;
-
- if (dev->param.always_check_erased)
- bi->skip_erased_check = 0;
-
- if (!bi->skip_erased_check) {
- erased_ok = yaffs_check_chunk_erased(dev, chunk);
- if (erased_ok != YAFFS_OK) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>> yaffs chunk %d was not erased",
- chunk);
-
- /* If not erased, delete this one,
- * skip rest of block and
- * try another chunk */
- yaffs_chunk_del(dev, chunk, 1, __LINE__);
- yaffs_skip_rest_of_block(dev);
- continue;
- }
- }
-
- write_ok = yaffs_wr_chunk_tags_nand(dev, chunk, data, tags);
-
- if (!bi->skip_erased_check)
- write_ok =
- yaffs_verify_chunk_written(dev, chunk, data, tags);
-
- if (write_ok != YAFFS_OK) {
- /* Clean up aborted write, skip to next block and
- * try another chunk */
- yaffs_handle_chunk_wr_error(dev, chunk, erased_ok);
- continue;
- }
-
- bi->skip_erased_check = 1;
-
- /* Copy the data into the robustification buffer */
- yaffs_handle_chunk_wr_ok(dev, chunk, data, tags);
-
- } while (write_ok != YAFFS_OK &&
- (yaffs_wr_attempts <= 0 || attempts <= yaffs_wr_attempts));
-
- if (!write_ok)
- chunk = -1;
-
- if (attempts > 1) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>> yaffs write required %d attempts",
- attempts);
- dev->n_retired_writes += (attempts - 1);
- }
-
- return chunk;
-}
-
-/*
- * Block retiring for handling a broken block.
- */
-
-static void yaffs_retire_block(struct yaffs_dev *dev, int flash_block)
-{
- struct yaffs_block_info *bi = yaffs_get_block_info(dev, flash_block);
-
- yaffs2_checkpt_invalidate(dev);
-
- yaffs2_clear_oldest_dirty_seq(dev, bi);
-
- if (yaffs_mark_bad(dev, flash_block) != YAFFS_OK) {
- if (yaffs_erase_block(dev, flash_block) != YAFFS_OK) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs: Failed to mark bad and erase block %d",
- flash_block);
- } else {
- struct yaffs_ext_tags tags;
- int chunk_id =
- flash_block * dev->param.chunks_per_block;
-
- u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
-
- memset(buffer, 0xff, dev->data_bytes_per_chunk);
- yaffs_init_tags(&tags);
- tags.seq_number = YAFFS_SEQUENCE_BAD_BLOCK;
- if (dev->param.write_chunk_tags_fn(dev, chunk_id -
- dev->chunk_offset,
- buffer,
- &tags) != YAFFS_OK)
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs: Failed to write bad block marker to block %d",
- flash_block);
-
- yaffs_release_temp_buffer(dev, buffer, __LINE__);
- }
- }
-
- bi->block_state = YAFFS_BLOCK_STATE_DEAD;
- bi->gc_prioritise = 0;
- bi->needs_retiring = 0;
-
- dev->n_retired_blocks++;
-}
-
-/*---------------- Name handling functions ------------*/
-
-static u16 yaffs_calc_name_sum(const YCHAR * name)
-{
- u16 sum = 0;
- u16 i = 1;
-
- const YUCHAR *bname = (const YUCHAR *)name;
- if (bname) {
- while ((*bname) && (i < (YAFFS_MAX_NAME_LENGTH / 2))) {
-
- /* 0x1f mask is case insensitive */
- sum += ((*bname) & 0x1f) * i;
- i++;
- bname++;
- }
- }
- return sum;
-}
-
-void yaffs_set_obj_name(struct yaffs_obj *obj, const YCHAR * name)
-{
-#ifndef CONFIG_YAFFS_NO_SHORT_NAMES
- memset(obj->short_name, 0, sizeof(obj->short_name));
- if (name &&
- strnlen(name, YAFFS_SHORT_NAME_LENGTH + 1) <=
- YAFFS_SHORT_NAME_LENGTH)
- strcpy(obj->short_name, name);
- else
- obj->short_name[0] = _Y('\0');
-#endif
- obj->sum = yaffs_calc_name_sum(name);
-}
-
-void yaffs_set_obj_name_from_oh(struct yaffs_obj *obj,
- const struct yaffs_obj_hdr *oh)
-{
-#ifdef CONFIG_YAFFS_AUTO_UNICODE
- YCHAR tmp_name[YAFFS_MAX_NAME_LENGTH + 1];
- memset(tmp_name, 0, sizeof(tmp_name));
- yaffs_load_name_from_oh(obj->my_dev, tmp_name, oh->name,
- YAFFS_MAX_NAME_LENGTH + 1);
- yaffs_set_obj_name(obj, tmp_name);
-#else
- yaffs_set_obj_name(obj, oh->name);
-#endif
-}
-
-/*-------------------- TNODES -------------------
-
- * List of spare tnodes
- * The list is hooked together using the first pointer
- * in the tnode.
- */
-
-struct yaffs_tnode *yaffs_get_tnode(struct yaffs_dev *dev)
-{
- struct yaffs_tnode *tn = yaffs_alloc_raw_tnode(dev);
- if (tn) {
- memset(tn, 0, dev->tnode_size);
- dev->n_tnodes++;
- }
-
- dev->checkpoint_blocks_required = 0; /* force recalculation */
-
- return tn;
-}
-
-/* FreeTnode frees up a tnode and puts it back on the free list */
-static void yaffs_free_tnode(struct yaffs_dev *dev, struct yaffs_tnode *tn)
-{
- yaffs_free_raw_tnode(dev, tn);
- dev->n_tnodes--;
- dev->checkpoint_blocks_required = 0; /* force recalculation */
-}
-
-static void yaffs_deinit_tnodes_and_objs(struct yaffs_dev *dev)
-{
- yaffs_deinit_raw_tnodes_and_objs(dev);
- dev->n_obj = 0;
- dev->n_tnodes = 0;
-}
-
-void yaffs_load_tnode_0(struct yaffs_dev *dev, struct yaffs_tnode *tn,
- unsigned pos, unsigned val)
-{
- u32 *map = (u32 *) tn;
- u32 bit_in_map;
- u32 bit_in_word;
- u32 word_in_map;
- u32 mask;
-
- pos &= YAFFS_TNODES_LEVEL0_MASK;
- val >>= dev->chunk_grp_bits;
-
- bit_in_map = pos * dev->tnode_width;
- word_in_map = bit_in_map / 32;
- bit_in_word = bit_in_map & (32 - 1);
-
- mask = dev->tnode_mask << bit_in_word;
-
- map[word_in_map] &= ~mask;
- map[word_in_map] |= (mask & (val << bit_in_word));
-
- if (dev->tnode_width > (32 - bit_in_word)) {
- bit_in_word = (32 - bit_in_word);
- word_in_map++;;
- mask =
- dev->tnode_mask >> ( /*dev->tnode_width - */ bit_in_word);
- map[word_in_map] &= ~mask;
- map[word_in_map] |= (mask & (val >> bit_in_word));
- }
-}
-
-u32 yaffs_get_group_base(struct yaffs_dev *dev, struct yaffs_tnode *tn,
- unsigned pos)
-{
- u32 *map = (u32 *) tn;
- u32 bit_in_map;
- u32 bit_in_word;
- u32 word_in_map;
- u32 val;
-
- pos &= YAFFS_TNODES_LEVEL0_MASK;
-
- bit_in_map = pos * dev->tnode_width;
- word_in_map = bit_in_map / 32;
- bit_in_word = bit_in_map & (32 - 1);
-
- val = map[word_in_map] >> bit_in_word;
-
- if (dev->tnode_width > (32 - bit_in_word)) {
- bit_in_word = (32 - bit_in_word);
- word_in_map++;;
- val |= (map[word_in_map] << bit_in_word);
- }
-
- val &= dev->tnode_mask;
- val <<= dev->chunk_grp_bits;
-
- return val;
-}
-
-/* ------------------- End of individual tnode manipulation -----------------*/
-
-/* ---------Functions to manipulate the look-up tree (made up of tnodes) ------
- * The look up tree is represented by the top tnode and the number of top_level
- * in the tree. 0 means only the level 0 tnode is in the tree.
- */
-
-/* FindLevel0Tnode finds the level 0 tnode, if one exists. */
-struct yaffs_tnode *yaffs_find_tnode_0(struct yaffs_dev *dev,
- struct yaffs_file_var *file_struct,
- u32 chunk_id)
-{
- struct yaffs_tnode *tn = file_struct->top;
- u32 i;
- int required_depth;
- int level = file_struct->top_level;
-
- dev = dev;
-
- /* Check sane level and chunk Id */
- if (level < 0 || level > YAFFS_TNODES_MAX_LEVEL)
- return NULL;
-
- if (chunk_id > YAFFS_MAX_CHUNK_ID)
- return NULL;
-
- /* First check we're tall enough (ie enough top_level) */
-
- i = chunk_id >> YAFFS_TNODES_LEVEL0_BITS;
- required_depth = 0;
- while (i) {
- i >>= YAFFS_TNODES_INTERNAL_BITS;
- required_depth++;
- }
-
- if (required_depth > file_struct->top_level)
- return NULL; /* Not tall enough, so we can't find it */
-
- /* Traverse down to level 0 */
- while (level > 0 && tn) {
- tn = tn->internal[(chunk_id >>
- (YAFFS_TNODES_LEVEL0_BITS +
- (level - 1) *
- YAFFS_TNODES_INTERNAL_BITS)) &
- YAFFS_TNODES_INTERNAL_MASK];
- level--;
- }
-
- return tn;
-}
-
-/* AddOrFindLevel0Tnode finds the level 0 tnode if it exists, otherwise first expands the tree.
- * This happens in two steps:
- * 1. If the tree isn't tall enough, then make it taller.
- * 2. Scan down the tree towards the level 0 tnode adding tnodes if required.
- *
- * Used when modifying the tree.
- *
- * If the tn argument is NULL, then a fresh tnode will be added otherwise the specified tn will
- * be plugged into the ttree.
- */
-
-struct yaffs_tnode *yaffs_add_find_tnode_0(struct yaffs_dev *dev,
- struct yaffs_file_var *file_struct,
- u32 chunk_id,
- struct yaffs_tnode *passed_tn)
-{
- int required_depth;
- int i;
- int l;
- struct yaffs_tnode *tn;
-
- u32 x;
-
- /* Check sane level and page Id */
- if (file_struct->top_level < 0
- || file_struct->top_level > YAFFS_TNODES_MAX_LEVEL)
- return NULL;
-
- if (chunk_id > YAFFS_MAX_CHUNK_ID)
- return NULL;
-
- /* First check we're tall enough (ie enough top_level) */
-
- x = chunk_id >> YAFFS_TNODES_LEVEL0_BITS;
- required_depth = 0;
- while (x) {
- x >>= YAFFS_TNODES_INTERNAL_BITS;
- required_depth++;
- }
-
- if (required_depth > file_struct->top_level) {
- /* Not tall enough, gotta make the tree taller */
- for (i = file_struct->top_level; i < required_depth; i++) {
-
- tn = yaffs_get_tnode(dev);
-
- if (tn) {
- tn->internal[0] = file_struct->top;
- file_struct->top = tn;
- file_struct->top_level++;
- } else {
- yaffs_trace(YAFFS_TRACE_ERROR, "yaffs: no more tnodes");
- return NULL;
- }
- }
- }
-
- /* Traverse down to level 0, adding anything we need */
-
- l = file_struct->top_level;
- tn = file_struct->top;
-
- if (l > 0) {
- while (l > 0 && tn) {
- x = (chunk_id >>
- (YAFFS_TNODES_LEVEL0_BITS +
- (l - 1) * YAFFS_TNODES_INTERNAL_BITS)) &
- YAFFS_TNODES_INTERNAL_MASK;
-
- if ((l > 1) && !tn->internal[x]) {
- /* Add missing non-level-zero tnode */
- tn->internal[x] = yaffs_get_tnode(dev);
- if (!tn->internal[x])
- return NULL;
- } else if (l == 1) {
- /* Looking from level 1 at level 0 */
- if (passed_tn) {
- /* If we already have one, then release it. */
- if (tn->internal[x])
- yaffs_free_tnode(dev,
- tn->
- internal[x]);
- tn->internal[x] = passed_tn;
-
- } else if (!tn->internal[x]) {
- /* Don't have one, none passed in */
- tn->internal[x] = yaffs_get_tnode(dev);
- if (!tn->internal[x])
- return NULL;
- }
- }
-
- tn = tn->internal[x];
- l--;
- }
- } else {
- /* top is level 0 */
- if (passed_tn) {
- memcpy(tn, passed_tn,
- (dev->tnode_width * YAFFS_NTNODES_LEVEL0) / 8);
- yaffs_free_tnode(dev, passed_tn);
- }
- }
-
- return tn;
-}
-
-static int yaffs_tags_match(const struct yaffs_ext_tags *tags, int obj_id,
- int chunk_obj)
-{
- return (tags->chunk_id == chunk_obj &&
- tags->obj_id == obj_id && !tags->is_deleted) ? 1 : 0;
-
-}
-
-static int yaffs_find_chunk_in_group(struct yaffs_dev *dev, int the_chunk,
- struct yaffs_ext_tags *tags, int obj_id,
- int inode_chunk)
-{
- int j;
-
- for (j = 0; the_chunk && j < dev->chunk_grp_size; j++) {
- if (yaffs_check_chunk_bit
- (dev, the_chunk / dev->param.chunks_per_block,
- the_chunk % dev->param.chunks_per_block)) {
-
- if (dev->chunk_grp_size == 1)
- return the_chunk;
- else {
- yaffs_rd_chunk_tags_nand(dev, the_chunk, NULL,
- tags);
- if (yaffs_tags_match(tags, obj_id, inode_chunk)) {
- /* found it; */
- return the_chunk;
- }
- }
- }
- the_chunk++;
- }
- return -1;
-}
-
-static int yaffs_find_chunk_in_file(struct yaffs_obj *in, int inode_chunk,
- struct yaffs_ext_tags *tags)
-{
- /*Get the Tnode, then get the level 0 offset chunk offset */
- struct yaffs_tnode *tn;
- int the_chunk = -1;
- struct yaffs_ext_tags local_tags;
- int ret_val = -1;
-
- struct yaffs_dev *dev = in->my_dev;
-
- if (!tags) {
- /* Passed a NULL, so use our own tags space */
- tags = &local_tags;
- }
-
- tn = yaffs_find_tnode_0(dev, &in->variant.file_variant, inode_chunk);
-
- if (tn) {
- the_chunk = yaffs_get_group_base(dev, tn, inode_chunk);
-
- ret_val =
- yaffs_find_chunk_in_group(dev, the_chunk, tags, in->obj_id,
- inode_chunk);
- }
- return ret_val;
-}
-
-static int yaffs_find_del_file_chunk(struct yaffs_obj *in, int inode_chunk,
- struct yaffs_ext_tags *tags)
-{
- /* Get the Tnode, then get the level 0 offset chunk offset */
- struct yaffs_tnode *tn;
- int the_chunk = -1;
- struct yaffs_ext_tags local_tags;
-
- struct yaffs_dev *dev = in->my_dev;
- int ret_val = -1;
-
- if (!tags) {
- /* Passed a NULL, so use our own tags space */
- tags = &local_tags;
- }
-
- tn = yaffs_find_tnode_0(dev, &in->variant.file_variant, inode_chunk);
-
- if (tn) {
-
- the_chunk = yaffs_get_group_base(dev, tn, inode_chunk);
-
- ret_val =
- yaffs_find_chunk_in_group(dev, the_chunk, tags, in->obj_id,
- inode_chunk);
-
- /* Delete the entry in the filestructure (if found) */
- if (ret_val != -1)
- yaffs_load_tnode_0(dev, tn, inode_chunk, 0);
- }
-
- return ret_val;
-}
-
-int yaffs_put_chunk_in_file(struct yaffs_obj *in, int inode_chunk,
- int nand_chunk, int in_scan)
-{
- /* NB in_scan is zero unless scanning.
- * For forward scanning, in_scan is > 0;
- * for backward scanning in_scan is < 0
- *
- * nand_chunk = 0 is a dummy insert to make sure the tnodes are there.
- */
-
- struct yaffs_tnode *tn;
- struct yaffs_dev *dev = in->my_dev;
- int existing_cunk;
- struct yaffs_ext_tags existing_tags;
- struct yaffs_ext_tags new_tags;
- unsigned existing_serial, new_serial;
-
- if (in->variant_type != YAFFS_OBJECT_TYPE_FILE) {
- /* Just ignore an attempt at putting a chunk into a non-file during scanning
- * If it is not during Scanning then something went wrong!
- */
- if (!in_scan) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy:attempt to put data chunk into a non-file"
- );
- YBUG();
- }
-
- yaffs_chunk_del(dev, nand_chunk, 1, __LINE__);
- return YAFFS_OK;
- }
-
- tn = yaffs_add_find_tnode_0(dev,
- &in->variant.file_variant,
- inode_chunk, NULL);
- if (!tn)
- return YAFFS_FAIL;
-
- if (!nand_chunk)
- /* Dummy insert, bail now */
- return YAFFS_OK;
-
- existing_cunk = yaffs_get_group_base(dev, tn, inode_chunk);
-
- if (in_scan != 0) {
- /* If we're scanning then we need to test for duplicates
- * NB This does not need to be efficient since it should only ever
- * happen when the power fails during a write, then only one
- * chunk should ever be affected.
- *
- * Correction for YAFFS2: This could happen quite a lot and we need to think about efficiency! TODO
- * Update: For backward scanning we don't need to re-read tags so this is quite cheap.
- */
-
- if (existing_cunk > 0) {
- /* NB Right now existing chunk will not be real chunk_id if the chunk group size > 1
- * thus we have to do a FindChunkInFile to get the real chunk id.
- *
- * We have a duplicate now we need to decide which one to use:
- *
- * Backwards scanning YAFFS2: The old one is what we use, dump the new one.
- * Forward scanning YAFFS2: The new one is what we use, dump the old one.
- * YAFFS1: Get both sets of tags and compare serial numbers.
- */
-
- if (in_scan > 0) {
- /* Only do this for forward scanning */
- yaffs_rd_chunk_tags_nand(dev,
- nand_chunk,
- NULL, &new_tags);
-
- /* Do a proper find */
- existing_cunk =
- yaffs_find_chunk_in_file(in, inode_chunk,
- &existing_tags);
- }
-
- if (existing_cunk <= 0) {
- /*Hoosterman - how did this happen? */
-
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: existing chunk < 0 in scan"
- );
-
- }
-
- /* NB The deleted flags should be false, otherwise the chunks will
- * not be loaded during a scan
- */
-
- if (in_scan > 0) {
- new_serial = new_tags.serial_number;
- existing_serial = existing_tags.serial_number;
- }
-
- if ((in_scan > 0) &&
- (existing_cunk <= 0 ||
- ((existing_serial + 1) & 3) == new_serial)) {
- /* Forward scanning.
- * Use new
- * Delete the old one and drop through to update the tnode
- */
- yaffs_chunk_del(dev, existing_cunk, 1,
- __LINE__);
- } else {
- /* Backward scanning or we want to use the existing one
- * Use existing.
- * Delete the new one and return early so that the tnode isn't changed
- */
- yaffs_chunk_del(dev, nand_chunk, 1, __LINE__);
- return YAFFS_OK;
- }
- }
-
- }
-
- if (existing_cunk == 0)
- in->n_data_chunks++;
-
- yaffs_load_tnode_0(dev, tn, inode_chunk, nand_chunk);
-
- return YAFFS_OK;
-}
-
-static void yaffs_soft_del_chunk(struct yaffs_dev *dev, int chunk)
-{
- struct yaffs_block_info *the_block;
- unsigned block_no;
-
- yaffs_trace(YAFFS_TRACE_DELETION, "soft delete chunk %d", chunk);
-
- block_no = chunk / dev->param.chunks_per_block;
- the_block = yaffs_get_block_info(dev, block_no);
- if (the_block) {
- the_block->soft_del_pages++;
- dev->n_free_chunks++;
- yaffs2_update_oldest_dirty_seq(dev, block_no, the_block);
- }
-}
-
-/* SoftDeleteWorker scans backwards through the tnode tree and soft deletes all the chunks in the file.
- * All soft deleting does is increment the block's softdelete count and pulls the chunk out
- * of the tnode.
- * Thus, essentially this is the same as DeleteWorker except that the chunks are soft deleted.
- */
-
-static int yaffs_soft_del_worker(struct yaffs_obj *in, struct yaffs_tnode *tn,
- u32 level, int chunk_offset)
-{
- int i;
- int the_chunk;
- int all_done = 1;
- struct yaffs_dev *dev = in->my_dev;
-
- if (tn) {
- if (level > 0) {
-
- for (i = YAFFS_NTNODES_INTERNAL - 1; all_done && i >= 0;
- i--) {
- if (tn->internal[i]) {
- all_done =
- yaffs_soft_del_worker(in,
- tn->internal
- [i],
- level - 1,
- (chunk_offset
- <<
- YAFFS_TNODES_INTERNAL_BITS)
- + i);
- if (all_done) {
- yaffs_free_tnode(dev,
- tn->internal
- [i]);
- tn->internal[i] = NULL;
- } else {
- /* Hoosterman... how could this happen? */
- }
- }
- }
- return (all_done) ? 1 : 0;
- } else if (level == 0) {
-
- for (i = YAFFS_NTNODES_LEVEL0 - 1; i >= 0; i--) {
- the_chunk = yaffs_get_group_base(dev, tn, i);
- if (the_chunk) {
- /* Note this does not find the real chunk, only the chunk group.
- * We make an assumption that a chunk group is not larger than
- * a block.
- */
- yaffs_soft_del_chunk(dev, the_chunk);
- yaffs_load_tnode_0(dev, tn, i, 0);
- }
-
- }
- return 1;
-
- }
-
- }
-
- return 1;
-
-}
-
-static void yaffs_remove_obj_from_dir(struct yaffs_obj *obj)
-{
- struct yaffs_dev *dev = obj->my_dev;
- struct yaffs_obj *parent;
-
- yaffs_verify_obj_in_dir(obj);
- parent = obj->parent;
-
- yaffs_verify_dir(parent);
-
- if (dev && dev->param.remove_obj_fn)
- dev->param.remove_obj_fn(obj);
-
- list_del_init(&obj->siblings);
- obj->parent = NULL;
-
- yaffs_verify_dir(parent);
-}
-
-void yaffs_add_obj_to_dir(struct yaffs_obj *directory, struct yaffs_obj *obj)
-{
- if (!directory) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "tragedy: Trying to add an object to a null pointer directory"
- );
- YBUG();
- return;
- }
- if (directory->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "tragedy: Trying to add an object to a non-directory"
- );
- YBUG();
- }
-
- if (obj->siblings.prev == NULL) {
- /* Not initialised */
- YBUG();
- }
-
- yaffs_verify_dir(directory);
-
- yaffs_remove_obj_from_dir(obj);
-
- /* Now add it */
- list_add(&obj->siblings, &directory->variant.dir_variant.children);
- obj->parent = directory;
-
- if (directory == obj->my_dev->unlinked_dir
- || directory == obj->my_dev->del_dir) {
- obj->unlinked = 1;
- obj->my_dev->n_unlinked_files++;
- obj->rename_allowed = 0;
- }
-
- yaffs_verify_dir(directory);
- yaffs_verify_obj_in_dir(obj);
-}
-
-static int yaffs_change_obj_name(struct yaffs_obj *obj,
- struct yaffs_obj *new_dir,
- const YCHAR * new_name, int force, int shadows)
-{
- int unlink_op;
- int del_op;
-
- struct yaffs_obj *existing_target;
-
- if (new_dir == NULL)
- new_dir = obj->parent; /* use the old directory */
-
- if (new_dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "tragedy: yaffs_change_obj_name: new_dir is not a directory"
- );
- YBUG();
- }
-
- /* TODO: Do we need this different handling for YAFFS2 and YAFFS1?? */
- if (obj->my_dev->param.is_yaffs2)
- unlink_op = (new_dir == obj->my_dev->unlinked_dir);
- else
- unlink_op = (new_dir == obj->my_dev->unlinked_dir
- && obj->variant_type == YAFFS_OBJECT_TYPE_FILE);
-
- del_op = (new_dir == obj->my_dev->del_dir);
-
- existing_target = yaffs_find_by_name(new_dir, new_name);
-
- /* If the object is a file going into the unlinked directory,
- * then it is OK to just stuff it in since duplicate names are allowed.
- * else only proceed if the new name does not exist and if we're putting
- * it into a directory.
- */
- if ((unlink_op ||
- del_op ||
- force ||
- (shadows > 0) ||
- !existing_target) &&
- new_dir->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_set_obj_name(obj, new_name);
- obj->dirty = 1;
-
- yaffs_add_obj_to_dir(new_dir, obj);
-
- if (unlink_op)
- obj->unlinked = 1;
-
- /* If it is a deletion then we mark it as a shrink for gc purposes. */
- if (yaffs_update_oh(obj, new_name, 0, del_op, shadows, NULL) >=
- 0)
- return YAFFS_OK;
- }
-
- return YAFFS_FAIL;
-}
-
-/*------------------------ Short Operations Cache ----------------------------------------
- * In many situations where there is no high level buffering a lot of
- * reads might be short sequential reads, and a lot of writes may be short
- * sequential writes. eg. scanning/writing a jpeg file.
- * In these cases, a short read/write cache can provide a huge perfomance
- * benefit with dumb-as-a-rock code.
- * In Linux, the page cache provides read buffering and the short op cache
- * provides write buffering.
- *
- * There are a limited number (~10) of cache chunks per device so that we don't
- * need a very intelligent search.
- */
-
-static int yaffs_obj_cache_dirty(struct yaffs_obj *obj)
-{
- struct yaffs_dev *dev = obj->my_dev;
- int i;
- struct yaffs_cache *cache;
- int n_caches = obj->my_dev->param.n_caches;
-
- for (i = 0; i < n_caches; i++) {
- cache = &dev->cache[i];
- if (cache->object == obj && cache->dirty)
- return 1;
- }
-
- return 0;
-}
-
-static void yaffs_flush_file_cache(struct yaffs_obj *obj)
-{
- struct yaffs_dev *dev = obj->my_dev;
- int lowest = -99; /* Stop compiler whining. */
- int i;
- struct yaffs_cache *cache;
- int chunk_written = 0;
- int n_caches = obj->my_dev->param.n_caches;
-
- if (n_caches > 0) {
- do {
- cache = NULL;
-
- /* Find the dirty cache for this object with the lowest chunk id. */
- for (i = 0; i < n_caches; i++) {
- if (dev->cache[i].object == obj &&
- dev->cache[i].dirty) {
- if (!cache
- || dev->cache[i].chunk_id <
- lowest) {
- cache = &dev->cache[i];
- lowest = cache->chunk_id;
- }
- }
- }
-
- if (cache && !cache->locked) {
- /* Write it out and free it up */
-
- chunk_written =
- yaffs_wr_data_obj(cache->object,
- cache->chunk_id,
- cache->data,
- cache->n_bytes, 1);
- cache->dirty = 0;
- cache->object = NULL;
- }
-
- } while (cache && chunk_written > 0);
-
- if (cache)
- /* Hoosterman, disk full while writing cache out. */
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: no space during cache write");
-
- }
-
-}
-
-/*yaffs_flush_whole_cache(dev)
- *
- *
- */
-
-void yaffs_flush_whole_cache(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj;
- int n_caches = dev->param.n_caches;
- int i;
-
- /* Find a dirty object in the cache and flush it...
- * until there are no further dirty objects.
- */
- do {
- obj = NULL;
- for (i = 0; i < n_caches && !obj; i++) {
- if (dev->cache[i].object && dev->cache[i].dirty)
- obj = dev->cache[i].object;
-
- }
- if (obj)
- yaffs_flush_file_cache(obj);
-
- } while (obj);
-
-}
-
-/* Grab us a cache chunk for use.
- * First look for an empty one.
- * Then look for the least recently used non-dirty one.
- * Then look for the least recently used dirty one...., flush and look again.
- */
-static struct yaffs_cache *yaffs_grab_chunk_worker(struct yaffs_dev *dev)
-{
- int i;
-
- if (dev->param.n_caches > 0) {
- for (i = 0; i < dev->param.n_caches; i++) {
- if (!dev->cache[i].object)
- return &dev->cache[i];
- }
- }
-
- return NULL;
-}
-
-static struct yaffs_cache *yaffs_grab_chunk_cache(struct yaffs_dev *dev)
-{
- struct yaffs_cache *cache;
- struct yaffs_obj *the_obj;
- int usage;
- int i;
- int pushout;
-
- if (dev->param.n_caches > 0) {
- /* Try find a non-dirty one... */
-
- cache = yaffs_grab_chunk_worker(dev);
-
- if (!cache) {
- /* They were all dirty, find the last recently used object and flush
- * its cache, then find again.
- * NB what's here is not very accurate, we actually flush the object
- * the last recently used page.
- */
-
- /* With locking we can't assume we can use entry zero */
-
- the_obj = NULL;
- usage = -1;
- cache = NULL;
- pushout = -1;
-
- for (i = 0; i < dev->param.n_caches; i++) {
- if (dev->cache[i].object &&
- !dev->cache[i].locked &&
- (dev->cache[i].last_use < usage
- || !cache)) {
- usage = dev->cache[i].last_use;
- the_obj = dev->cache[i].object;
- cache = &dev->cache[i];
- pushout = i;
- }
- }
-
- if (!cache || cache->dirty) {
- /* Flush and try again */
- yaffs_flush_file_cache(the_obj);
- cache = yaffs_grab_chunk_worker(dev);
- }
-
- }
- return cache;
- } else {
- return NULL;
- }
-}
-
-/* Find a cached chunk */
-static struct yaffs_cache *yaffs_find_chunk_cache(const struct yaffs_obj *obj,
- int chunk_id)
-{
- struct yaffs_dev *dev = obj->my_dev;
- int i;
- if (dev->param.n_caches > 0) {
- for (i = 0; i < dev->param.n_caches; i++) {
- if (dev->cache[i].object == obj &&
- dev->cache[i].chunk_id == chunk_id) {
- dev->cache_hits++;
-
- return &dev->cache[i];
- }
- }
- }
- return NULL;
-}
-
-/* Mark the chunk for the least recently used algorithym */
-static void yaffs_use_cache(struct yaffs_dev *dev, struct yaffs_cache *cache,
- int is_write)
-{
-
- if (dev->param.n_caches > 0) {
- if (dev->cache_last_use < 0 || dev->cache_last_use > 100000000) {
- /* Reset the cache usages */
- int i;
- for (i = 1; i < dev->param.n_caches; i++)
- dev->cache[i].last_use = 0;
-
- dev->cache_last_use = 0;
- }
-
- dev->cache_last_use++;
-
- cache->last_use = dev->cache_last_use;
-
- if (is_write)
- cache->dirty = 1;
- }
-}
-
-/* Invalidate a single cache page.
- * Do this when a whole page gets written,
- * ie the short cache for this page is no longer valid.
- */
-static void yaffs_invalidate_chunk_cache(struct yaffs_obj *object, int chunk_id)
-{
- if (object->my_dev->param.n_caches > 0) {
- struct yaffs_cache *cache =
- yaffs_find_chunk_cache(object, chunk_id);
-
- if (cache)
- cache->object = NULL;
- }
-}
-
-/* Invalidate all the cache pages associated with this object
- * Do this whenever ther file is deleted or resized.
- */
-static void yaffs_invalidate_whole_cache(struct yaffs_obj *in)
-{
- int i;
- struct yaffs_dev *dev = in->my_dev;
-
- if (dev->param.n_caches > 0) {
- /* Invalidate it. */
- for (i = 0; i < dev->param.n_caches; i++) {
- if (dev->cache[i].object == in)
- dev->cache[i].object = NULL;
- }
- }
-}
-
-static void yaffs_unhash_obj(struct yaffs_obj *obj)
-{
- int bucket;
- struct yaffs_dev *dev = obj->my_dev;
-
- /* If it is still linked into the bucket list, free from the list */
- if (!list_empty(&obj->hash_link)) {
- list_del_init(&obj->hash_link);
- bucket = yaffs_hash_fn(obj->obj_id);
- dev->obj_bucket[bucket].count--;
- }
-}
-
-/* FreeObject frees up a Object and puts it back on the free list */
-static void yaffs_free_obj(struct yaffs_obj *obj)
-{
- struct yaffs_dev *dev = obj->my_dev;
-
- yaffs_trace(YAFFS_TRACE_OS, "FreeObject %p inode %p",
- obj, obj->my_inode);
-
- if (!obj)
- YBUG();
- if (obj->parent)
- YBUG();
- if (!list_empty(&obj->siblings))
- YBUG();
-
- if (obj->my_inode) {
- /* We're still hooked up to a cached inode.
- * Don't delete now, but mark for later deletion
- */
- obj->defered_free = 1;
- return;
- }
-
- yaffs_unhash_obj(obj);
-
- yaffs_free_raw_obj(dev, obj);
- dev->n_obj--;
- dev->checkpoint_blocks_required = 0; /* force recalculation */
-}
-
-void yaffs_handle_defered_free(struct yaffs_obj *obj)
-{
- if (obj->defered_free)
- yaffs_free_obj(obj);
-}
-
-static int yaffs_generic_obj_del(struct yaffs_obj *in)
-{
-
- /* First off, invalidate the file's data in the cache, without flushing. */
- yaffs_invalidate_whole_cache(in);
-
- if (in->my_dev->param.is_yaffs2 && (in->parent != in->my_dev->del_dir)) {
- /* Move to the unlinked directory so we have a record that it was deleted. */
- yaffs_change_obj_name(in, in->my_dev->del_dir, _Y("deleted"), 0,
- 0);
-
- }
-
- yaffs_remove_obj_from_dir(in);
- yaffs_chunk_del(in->my_dev, in->hdr_chunk, 1, __LINE__);
- in->hdr_chunk = 0;
-
- yaffs_free_obj(in);
- return YAFFS_OK;
-
-}
-
-static void yaffs_soft_del_file(struct yaffs_obj *obj)
-{
- if (obj->deleted &&
- obj->variant_type == YAFFS_OBJECT_TYPE_FILE && !obj->soft_del) {
- if (obj->n_data_chunks <= 0) {
- /* Empty file with no duplicate object headers,
- * just delete it immediately */
- yaffs_free_tnode(obj->my_dev,
- obj->variant.file_variant.top);
- obj->variant.file_variant.top = NULL;
- yaffs_trace(YAFFS_TRACE_TRACING,
- "yaffs: Deleting empty file %d",
- obj->obj_id);
- yaffs_generic_obj_del(obj);
- } else {
- yaffs_soft_del_worker(obj,
- obj->variant.file_variant.top,
- obj->variant.
- file_variant.top_level, 0);
- obj->soft_del = 1;
- }
- }
-}
-
-/* Pruning removes any part of the file structure tree that is beyond the
- * bounds of the file (ie that does not point to chunks).
- *
- * A file should only get pruned when its size is reduced.
- *
- * Before pruning, the chunks must be pulled from the tree and the
- * level 0 tnode entries must be zeroed out.
- * Could also use this for file deletion, but that's probably better handled
- * by a special case.
- *
- * This function is recursive. For levels > 0 the function is called again on
- * any sub-tree. For level == 0 we just check if the sub-tree has data.
- * If there is no data in a subtree then it is pruned.
- */
-
-static struct yaffs_tnode *yaffs_prune_worker(struct yaffs_dev *dev,
- struct yaffs_tnode *tn, u32 level,
- int del0)
-{
- int i;
- int has_data;
-
- if (tn) {
- has_data = 0;
-
- if (level > 0) {
- for (i = 0; i < YAFFS_NTNODES_INTERNAL; i++) {
- if (tn->internal[i]) {
- tn->internal[i] =
- yaffs_prune_worker(dev,
- tn->internal[i],
- level - 1,
- (i ==
- 0) ? del0 : 1);
- }
-
- if (tn->internal[i])
- has_data++;
- }
- } else {
- int tnode_size_u32 = dev->tnode_size / sizeof(u32);
- u32 *map = (u32 *) tn;
-
- for (i = 0; !has_data && i < tnode_size_u32; i++) {
- if (map[i])
- has_data++;
- }
- }
-
- if (has_data == 0 && del0) {
- /* Free and return NULL */
-
- yaffs_free_tnode(dev, tn);
- tn = NULL;
- }
-
- }
-
- return tn;
-
-}
-
-static int yaffs_prune_tree(struct yaffs_dev *dev,
- struct yaffs_file_var *file_struct)
-{
- int i;
- int has_data;
- int done = 0;
- struct yaffs_tnode *tn;
-
- if (file_struct->top_level > 0) {
- file_struct->top =
- yaffs_prune_worker(dev, file_struct->top,
- file_struct->top_level, 0);
-
- /* Now we have a tree with all the non-zero branches NULL but the height
- * is the same as it was.
- * Let's see if we can trim internal tnodes to shorten the tree.
- * We can do this if only the 0th element in the tnode is in use
- * (ie all the non-zero are NULL)
- */
-
- while (file_struct->top_level && !done) {
- tn = file_struct->top;
-
- has_data = 0;
- for (i = 1; i < YAFFS_NTNODES_INTERNAL; i++) {
- if (tn->internal[i])
- has_data++;
- }
-
- if (!has_data) {
- file_struct->top = tn->internal[0];
- file_struct->top_level--;
- yaffs_free_tnode(dev, tn);
- } else {
- done = 1;
- }
- }
- }
-
- return YAFFS_OK;
-}
-
-/*-------------------- End of File Structure functions.-------------------*/
-
-/* AllocateEmptyObject gets us a clean Object. Tries to make allocate more if we run out */
-static struct yaffs_obj *yaffs_alloc_empty_obj(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj = yaffs_alloc_raw_obj(dev);
-
- if (obj) {
- dev->n_obj++;
-
- /* Now sweeten it up... */
-
- memset(obj, 0, sizeof(struct yaffs_obj));
- obj->being_created = 1;
-
- obj->my_dev = dev;
- obj->hdr_chunk = 0;
- obj->variant_type = YAFFS_OBJECT_TYPE_UNKNOWN;
- INIT_LIST_HEAD(&(obj->hard_links));
- INIT_LIST_HEAD(&(obj->hash_link));
- INIT_LIST_HEAD(&obj->siblings);
-
- /* Now make the directory sane */
- if (dev->root_dir) {
- obj->parent = dev->root_dir;
- list_add(&(obj->siblings),
- &dev->root_dir->variant.dir_variant.children);
- }
-
- /* Add it to the lost and found directory.
- * NB Can't put root or lost-n-found in lost-n-found so
- * check if lost-n-found exists first
- */
- if (dev->lost_n_found)
- yaffs_add_obj_to_dir(dev->lost_n_found, obj);
-
- obj->being_created = 0;
- }
-
- dev->checkpoint_blocks_required = 0; /* force recalculation */
-
- return obj;
-}
-
-static int yaffs_find_nice_bucket(struct yaffs_dev *dev)
-{
- int i;
- int l = 999;
- int lowest = 999999;
-
- /* Search for the shortest list or one that
- * isn't too long.
- */
-
- for (i = 0; i < 10 && lowest > 4; i++) {
- dev->bucket_finder++;
- dev->bucket_finder %= YAFFS_NOBJECT_BUCKETS;
- if (dev->obj_bucket[dev->bucket_finder].count < lowest) {
- lowest = dev->obj_bucket[dev->bucket_finder].count;
- l = dev->bucket_finder;
- }
-
- }
-
- return l;
-}
-
-static int yaffs_new_obj_id(struct yaffs_dev *dev)
-{
- int bucket = yaffs_find_nice_bucket(dev);
-
- /* Now find an object value that has not already been taken
- * by scanning the list.
- */
-
- int found = 0;
- struct list_head *i;
-
- u32 n = (u32) bucket;
-
- /* yaffs_check_obj_hash_sane(); */
-
- while (!found) {
- found = 1;
- n += YAFFS_NOBJECT_BUCKETS;
- if (1 || dev->obj_bucket[bucket].count > 0) {
- list_for_each(i, &dev->obj_bucket[bucket].list) {
- /* If there is already one in the list */
- if (i && list_entry(i, struct yaffs_obj,
- hash_link)->obj_id == n) {
- found = 0;
- }
- }
- }
- }
-
- return n;
-}
-
-static void yaffs_hash_obj(struct yaffs_obj *in)
-{
- int bucket = yaffs_hash_fn(in->obj_id);
- struct yaffs_dev *dev = in->my_dev;
-
- list_add(&in->hash_link, &dev->obj_bucket[bucket].list);
- dev->obj_bucket[bucket].count++;
-}
-
-struct yaffs_obj *yaffs_find_by_number(struct yaffs_dev *dev, u32 number)
-{
- int bucket = yaffs_hash_fn(number);
- struct list_head *i;
- struct yaffs_obj *in;
-
- list_for_each(i, &dev->obj_bucket[bucket].list) {
- /* Look if it is in the list */
- if (i) {
- in = list_entry(i, struct yaffs_obj, hash_link);
- if (in->obj_id == number) {
-
- /* Don't tell the VFS about this one if it is defered free */
- if (in->defered_free)
- return NULL;
-
- return in;
- }
- }
- }
-
- return NULL;
-}
-
-struct yaffs_obj *yaffs_new_obj(struct yaffs_dev *dev, int number,
- enum yaffs_obj_type type)
-{
- struct yaffs_obj *the_obj = NULL;
- struct yaffs_tnode *tn = NULL;
-
- if (number < 0)
- number = yaffs_new_obj_id(dev);
-
- if (type == YAFFS_OBJECT_TYPE_FILE) {
- tn = yaffs_get_tnode(dev);
- if (!tn)
- return NULL;
- }
-
- the_obj = yaffs_alloc_empty_obj(dev);
- if (!the_obj) {
- if (tn)
- yaffs_free_tnode(dev, tn);
- return NULL;
- }
-
- if (the_obj) {
- the_obj->fake = 0;
- the_obj->rename_allowed = 1;
- the_obj->unlink_allowed = 1;
- the_obj->obj_id = number;
- yaffs_hash_obj(the_obj);
- the_obj->variant_type = type;
- yaffs_load_current_time(the_obj, 1, 1);
-
- switch (type) {
- case YAFFS_OBJECT_TYPE_FILE:
- the_obj->variant.file_variant.file_size = 0;
- the_obj->variant.file_variant.scanned_size = 0;
- the_obj->variant.file_variant.shrink_size = ~0; /* max */
- the_obj->variant.file_variant.top_level = 0;
- the_obj->variant.file_variant.top = tn;
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- INIT_LIST_HEAD(&the_obj->variant.dir_variant.children);
- INIT_LIST_HEAD(&the_obj->variant.dir_variant.dirty);
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- case YAFFS_OBJECT_TYPE_HARDLINK:
- case YAFFS_OBJECT_TYPE_SPECIAL:
- /* No action required */
- break;
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- /* todo this should not happen */
- break;
- }
- }
-
- return the_obj;
-}
-
-static struct yaffs_obj *yaffs_create_fake_dir(struct yaffs_dev *dev,
- int number, u32 mode)
-{
-
- struct yaffs_obj *obj =
- yaffs_new_obj(dev, number, YAFFS_OBJECT_TYPE_DIRECTORY);
- if (obj) {
- obj->fake = 1; /* it is fake so it might have no NAND presence... */
- obj->rename_allowed = 0; /* ... and we're not allowed to rename it... */
- obj->unlink_allowed = 0; /* ... or unlink it */
- obj->deleted = 0;
- obj->unlinked = 0;
- obj->yst_mode = mode;
- obj->my_dev = dev;
- obj->hdr_chunk = 0; /* Not a valid chunk. */
- }
-
- return obj;
-
-}
-
-
-static void yaffs_init_tnodes_and_objs(struct yaffs_dev *dev)
-{
- int i;
-
- dev->n_obj = 0;
- dev->n_tnodes = 0;
-
- yaffs_init_raw_tnodes_and_objs(dev);
-
- for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) {
- INIT_LIST_HEAD(&dev->obj_bucket[i].list);
- dev->obj_bucket[i].count = 0;
- }
-}
-
-struct yaffs_obj *yaffs_find_or_create_by_number(struct yaffs_dev *dev,
- int number,
- enum yaffs_obj_type type)
-{
- struct yaffs_obj *the_obj = NULL;
-
- if (number > 0)
- the_obj = yaffs_find_by_number(dev, number);
-
- if (!the_obj)
- the_obj = yaffs_new_obj(dev, number, type);
-
- return the_obj;
-
-}
-
-YCHAR *yaffs_clone_str(const YCHAR * str)
-{
- YCHAR *new_str = NULL;
- int len;
-
- if (!str)
- str = _Y("");
-
- len = strnlen(str, YAFFS_MAX_ALIAS_LENGTH);
- new_str = kmalloc((len + 1) * sizeof(YCHAR), GFP_NOFS);
- if (new_str) {
- strncpy(new_str, str, len);
- new_str[len] = 0;
- }
- return new_str;
-
-}
-/*
- *yaffs_update_parent() handles fixing a directories mtime and ctime when a new
- * link (ie. name) is created or deleted in the directory.
- *
- * ie.
- * create dir/a : update dir's mtime/ctime
- * rm dir/a: update dir's mtime/ctime
- * modify dir/a: don't update dir's mtimme/ctime
- *
- * This can be handled immediately or defered. Defering helps reduce the number
- * of updates when many files in a directory are changed within a brief period.
- *
- * If the directory updating is defered then yaffs_update_dirty_dirs must be
- * called periodically.
- */
-
-static void yaffs_update_parent(struct yaffs_obj *obj)
-{
- struct yaffs_dev *dev;
- if (!obj)
- return;
- dev = obj->my_dev;
- obj->dirty = 1;
- yaffs_load_current_time(obj, 0, 1);
- if (dev->param.defered_dir_update) {
- struct list_head *link = &obj->variant.dir_variant.dirty;
-
- if (list_empty(link)) {
- list_add(link, &dev->dirty_dirs);
- yaffs_trace(YAFFS_TRACE_BACKGROUND,
- "Added object %d to dirty directories",
- obj->obj_id);
- }
-
- } else {
- yaffs_update_oh(obj, NULL, 0, 0, 0, NULL);
- }
-}
-
-void yaffs_update_dirty_dirs(struct yaffs_dev *dev)
-{
- struct list_head *link;
- struct yaffs_obj *obj;
- struct yaffs_dir_var *d_s;
- union yaffs_obj_var *o_v;
-
- yaffs_trace(YAFFS_TRACE_BACKGROUND, "Update dirty directories");
-
- while (!list_empty(&dev->dirty_dirs)) {
- link = dev->dirty_dirs.next;
- list_del_init(link);
-
- d_s = list_entry(link, struct yaffs_dir_var, dirty);
- o_v = list_entry(d_s, union yaffs_obj_var, dir_variant);
- obj = list_entry(o_v, struct yaffs_obj, variant);
-
- yaffs_trace(YAFFS_TRACE_BACKGROUND, "Update directory %d",
- obj->obj_id);
-
- if (obj->dirty)
- yaffs_update_oh(obj, NULL, 0, 0, 0, NULL);
- }
-}
-
-/*
- * Mknod (create) a new object.
- * equiv_obj only has meaning for a hard link;
- * alias_str only has meaning for a symlink.
- * rdev only has meaning for devices (a subset of special objects)
- */
-
-static struct yaffs_obj *yaffs_create_obj(enum yaffs_obj_type type,
- struct yaffs_obj *parent,
- const YCHAR * name,
- u32 mode,
- u32 uid,
- u32 gid,
- struct yaffs_obj *equiv_obj,
- const YCHAR * alias_str, u32 rdev)
-{
- struct yaffs_obj *in;
- YCHAR *str = NULL;
-
- struct yaffs_dev *dev = parent->my_dev;
-
- /* Check if the entry exists. If it does then fail the call since we don't want a dup. */
- if (yaffs_find_by_name(parent, name))
- return NULL;
-
- if (type == YAFFS_OBJECT_TYPE_SYMLINK) {
- str = yaffs_clone_str(alias_str);
- if (!str)
- return NULL;
- }
-
- in = yaffs_new_obj(dev, -1, type);
-
- if (!in) {
- if (str)
- kfree(str);
- return NULL;
- }
-
- if (in) {
- in->hdr_chunk = 0;
- in->valid = 1;
- in->variant_type = type;
-
- in->yst_mode = mode;
-
- yaffs_attribs_init(in, gid, uid, rdev);
-
- in->n_data_chunks = 0;
-
- yaffs_set_obj_name(in, name);
- in->dirty = 1;
-
- yaffs_add_obj_to_dir(parent, in);
-
- in->my_dev = parent->my_dev;
-
- switch (type) {
- case YAFFS_OBJECT_TYPE_SYMLINK:
- in->variant.symlink_variant.alias = str;
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- in->variant.hardlink_variant.equiv_obj = equiv_obj;
- in->variant.hardlink_variant.equiv_id =
- equiv_obj->obj_id;
- list_add(&in->hard_links, &equiv_obj->hard_links);
- break;
- case YAFFS_OBJECT_TYPE_FILE:
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- case YAFFS_OBJECT_TYPE_SPECIAL:
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- /* do nothing */
- break;
- }
-
- if (yaffs_update_oh(in, name, 0, 0, 0, NULL) < 0) {
- /* Could not create the object header, fail the creation */
- yaffs_del_obj(in);
- in = NULL;
- }
-
- yaffs_update_parent(parent);
- }
-
- return in;
-}
-
-struct yaffs_obj *yaffs_create_file(struct yaffs_obj *parent,
- const YCHAR * name, u32 mode, u32 uid,
- u32 gid)
-{
- return yaffs_create_obj(YAFFS_OBJECT_TYPE_FILE, parent, name, mode,
- uid, gid, NULL, NULL, 0);
-}
-
-struct yaffs_obj *yaffs_create_dir(struct yaffs_obj *parent, const YCHAR * name,
- u32 mode, u32 uid, u32 gid)
-{
- return yaffs_create_obj(YAFFS_OBJECT_TYPE_DIRECTORY, parent, name,
- mode, uid, gid, NULL, NULL, 0);
-}
-
-struct yaffs_obj *yaffs_create_special(struct yaffs_obj *parent,
- const YCHAR * name, u32 mode, u32 uid,
- u32 gid, u32 rdev)
-{
- return yaffs_create_obj(YAFFS_OBJECT_TYPE_SPECIAL, parent, name, mode,
- uid, gid, NULL, NULL, rdev);
-}
-
-struct yaffs_obj *yaffs_create_symlink(struct yaffs_obj *parent,
- const YCHAR * name, u32 mode, u32 uid,
- u32 gid, const YCHAR * alias)
-{
- return yaffs_create_obj(YAFFS_OBJECT_TYPE_SYMLINK, parent, name, mode,
- uid, gid, NULL, alias, 0);
-}
-
-/* yaffs_link_obj returns the object id of the equivalent object.*/
-struct yaffs_obj *yaffs_link_obj(struct yaffs_obj *parent, const YCHAR * name,
- struct yaffs_obj *equiv_obj)
-{
- /* Get the real object in case we were fed a hard link as an equivalent object */
- equiv_obj = yaffs_get_equivalent_obj(equiv_obj);
-
- if (yaffs_create_obj
- (YAFFS_OBJECT_TYPE_HARDLINK, parent, name, 0, 0, 0,
- equiv_obj, NULL, 0)) {
- return equiv_obj;
- } else {
- return NULL;
- }
-
-}
-
-
-
-/*------------------------- Block Management and Page Allocation ----------------*/
-
-static int yaffs_init_blocks(struct yaffs_dev *dev)
-{
- int n_blocks = dev->internal_end_block - dev->internal_start_block + 1;
-
- dev->block_info = NULL;
- dev->chunk_bits = NULL;
-
- dev->alloc_block = -1; /* force it to get a new one */
-
- /* If the first allocation strategy fails, thry the alternate one */
- dev->block_info =
- kmalloc(n_blocks * sizeof(struct yaffs_block_info), GFP_NOFS);
- if (!dev->block_info) {
- dev->block_info =
- vmalloc(n_blocks * sizeof(struct yaffs_block_info));
- dev->block_info_alt = 1;
- } else {
- dev->block_info_alt = 0;
- }
-
- if (dev->block_info) {
- /* Set up dynamic blockinfo stuff. Round up bytes. */
- dev->chunk_bit_stride = (dev->param.chunks_per_block + 7) / 8;
- dev->chunk_bits =
- kmalloc(dev->chunk_bit_stride * n_blocks, GFP_NOFS);
- if (!dev->chunk_bits) {
- dev->chunk_bits =
- vmalloc(dev->chunk_bit_stride * n_blocks);
- dev->chunk_bits_alt = 1;
- } else {
- dev->chunk_bits_alt = 0;
- }
- }
-
- if (dev->block_info && dev->chunk_bits) {
- memset(dev->block_info, 0,
- n_blocks * sizeof(struct yaffs_block_info));
- memset(dev->chunk_bits, 0, dev->chunk_bit_stride * n_blocks);
- return YAFFS_OK;
- }
-
- return YAFFS_FAIL;
-}
-
-static void yaffs_deinit_blocks(struct yaffs_dev *dev)
-{
- if (dev->block_info_alt && dev->block_info)
- vfree(dev->block_info);
- else if (dev->block_info)
- kfree(dev->block_info);
-
- dev->block_info_alt = 0;
-
- dev->block_info = NULL;
-
- if (dev->chunk_bits_alt && dev->chunk_bits)
- vfree(dev->chunk_bits);
- else if (dev->chunk_bits)
- kfree(dev->chunk_bits);
- dev->chunk_bits_alt = 0;
- dev->chunk_bits = NULL;
-}
-
-void yaffs_block_became_dirty(struct yaffs_dev *dev, int block_no)
-{
- struct yaffs_block_info *bi = yaffs_get_block_info(dev, block_no);
-
- int erased_ok = 0;
-
- /* If the block is still healthy erase it and mark as clean.
- * If the block has had a data failure, then retire it.
- */
-
- yaffs_trace(YAFFS_TRACE_GC | YAFFS_TRACE_ERASE,
- "yaffs_block_became_dirty block %d state %d %s",
- block_no, bi->block_state,
- (bi->needs_retiring) ? "needs retiring" : "");
-
- yaffs2_clear_oldest_dirty_seq(dev, bi);
-
- bi->block_state = YAFFS_BLOCK_STATE_DIRTY;
-
- /* If this is the block being garbage collected then stop gc'ing this block */
- if (block_no == dev->gc_block)
- dev->gc_block = 0;
-
- /* If this block is currently the best candidate for gc then drop as a candidate */
- if (block_no == dev->gc_dirtiest) {
- dev->gc_dirtiest = 0;
- dev->gc_pages_in_use = 0;
- }
-
- if (!bi->needs_retiring) {
- yaffs2_checkpt_invalidate(dev);
- erased_ok = yaffs_erase_block(dev, block_no);
- if (!erased_ok) {
- dev->n_erase_failures++;
- yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
- "**>> Erasure failed %d", block_no);
- }
- }
-
- if (erased_ok &&
- ((yaffs_trace_mask & YAFFS_TRACE_ERASE)
- || !yaffs_skip_verification(dev))) {
- int i;
- for (i = 0; i < dev->param.chunks_per_block; i++) {
- if (!yaffs_check_chunk_erased
- (dev, block_no * dev->param.chunks_per_block + i)) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- ">>Block %d erasure supposedly OK, but chunk %d not erased",
- block_no, i);
- }
- }
- }
-
- if (erased_ok) {
- /* Clean it up... */
- bi->block_state = YAFFS_BLOCK_STATE_EMPTY;
- bi->seq_number = 0;
- dev->n_erased_blocks++;
- bi->pages_in_use = 0;
- bi->soft_del_pages = 0;
- bi->has_shrink_hdr = 0;
- bi->skip_erased_check = 1; /* Clean, so no need to check */
- bi->gc_prioritise = 0;
- yaffs_clear_chunk_bits(dev, block_no);
-
- yaffs_trace(YAFFS_TRACE_ERASE,
- "Erased block %d", block_no);
- } else {
- /* We lost a block of free space */
- dev->n_free_chunks -= dev->param.chunks_per_block;
- yaffs_retire_block(dev, block_no);
- yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
- "**>> Block %d retired", block_no);
- }
-}
-
-
-
-static int yaffs_gc_block(struct yaffs_dev *dev, int block, int whole_block)
-{
- int old_chunk;
- int new_chunk;
- int mark_flash;
- int ret_val = YAFFS_OK;
- int i;
- int is_checkpt_block;
- int matching_chunk;
- int max_copies;
-
- int chunks_before = yaffs_get_erased_chunks(dev);
- int chunks_after;
-
- struct yaffs_ext_tags tags;
-
- struct yaffs_block_info *bi = yaffs_get_block_info(dev, block);
-
- struct yaffs_obj *object;
-
- is_checkpt_block = (bi->block_state == YAFFS_BLOCK_STATE_CHECKPOINT);
-
- yaffs_trace(YAFFS_TRACE_TRACING,
- "Collecting block %d, in use %d, shrink %d, whole_block %d",
- block, bi->pages_in_use, bi->has_shrink_hdr,
- whole_block);
-
- /*yaffs_verify_free_chunks(dev); */
-
- if (bi->block_state == YAFFS_BLOCK_STATE_FULL)
- bi->block_state = YAFFS_BLOCK_STATE_COLLECTING;
-
- bi->has_shrink_hdr = 0; /* clear the flag so that the block can erase */
-
- dev->gc_disable = 1;
-
- if (is_checkpt_block || !yaffs_still_some_chunks(dev, block)) {
- yaffs_trace(YAFFS_TRACE_TRACING,
- "Collecting block %d that has no chunks in use",
- block);
- yaffs_block_became_dirty(dev, block);
- } else {
-
- u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
-
- yaffs_verify_blk(dev, bi, block);
-
- max_copies = (whole_block) ? dev->param.chunks_per_block : 5;
- old_chunk = block * dev->param.chunks_per_block + dev->gc_chunk;
-
- for ( /* init already done */ ;
- ret_val == YAFFS_OK &&
- dev->gc_chunk < dev->param.chunks_per_block &&
- (bi->block_state == YAFFS_BLOCK_STATE_COLLECTING) &&
- max_copies > 0; dev->gc_chunk++, old_chunk++) {
- if (yaffs_check_chunk_bit(dev, block, dev->gc_chunk)) {
-
- /* This page is in use and might need to be copied off */
-
- max_copies--;
-
- mark_flash = 1;
-
- yaffs_init_tags(&tags);
-
- yaffs_rd_chunk_tags_nand(dev, old_chunk,
- buffer, &tags);
-
- object = yaffs_find_by_number(dev, tags.obj_id);
-
- yaffs_trace(YAFFS_TRACE_GC_DETAIL,
- "Collecting chunk in block %d, %d %d %d ",
- dev->gc_chunk, tags.obj_id,
- tags.chunk_id, tags.n_bytes);
-
- if (object && !yaffs_skip_verification(dev)) {
- if (tags.chunk_id == 0)
- matching_chunk =
- object->hdr_chunk;
- else if (object->soft_del)
- matching_chunk = old_chunk; /* Defeat the test */
- else
- matching_chunk =
- yaffs_find_chunk_in_file
- (object, tags.chunk_id,
- NULL);
-
- if (old_chunk != matching_chunk)
- yaffs_trace(YAFFS_TRACE_ERROR,
- "gc: page in gc mismatch: %d %d %d %d",
- old_chunk,
- matching_chunk,
- tags.obj_id,
- tags.chunk_id);
-
- }
-
- if (!object) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "page %d in gc has no object: %d %d %d ",
- old_chunk,
- tags.obj_id, tags.chunk_id,
- tags.n_bytes);
- }
-
- if (object &&
- object->deleted &&
- object->soft_del && tags.chunk_id != 0) {
- /* Data chunk in a soft deleted file, throw it away
- * It's a soft deleted data chunk,
- * No need to copy this, just forget about it and
- * fix up the object.
- */
-
- /* Free chunks already includes softdeleted chunks.
- * How ever this chunk is going to soon be really deleted
- * which will increment free chunks.
- * We have to decrement free chunks so this works out properly.
- */
- dev->n_free_chunks--;
- bi->soft_del_pages--;
-
- object->n_data_chunks--;
-
- if (object->n_data_chunks <= 0) {
- /* remeber to clean up the object */
- dev->gc_cleanup_list[dev->
- n_clean_ups]
- = tags.obj_id;
- dev->n_clean_ups++;
- }
- mark_flash = 0;
- } else if (0) {
- /* Todo object && object->deleted && object->n_data_chunks == 0 */
- /* Deleted object header with no data chunks.
- * Can be discarded and the file deleted.
- */
- object->hdr_chunk = 0;
- yaffs_free_tnode(object->my_dev,
- object->
- variant.file_variant.
- top);
- object->variant.file_variant.top = NULL;
- yaffs_generic_obj_del(object);
-
- } else if (object) {
- /* It's either a data chunk in a live file or
- * an ObjectHeader, so we're interested in it.
- * NB Need to keep the ObjectHeaders of deleted files
- * until the whole file has been deleted off
- */
- tags.serial_number++;
-
- dev->n_gc_copies++;
-
- if (tags.chunk_id == 0) {
- /* It is an object Id,
- * We need to nuke the shrinkheader flags first
- * Also need to clean up shadowing.
- * We no longer want the shrink_header flag since its work is done
- * and if it is left in place it will mess up scanning.
- */
-
- struct yaffs_obj_hdr *oh;
- oh = (struct yaffs_obj_hdr *)
- buffer;
-
- oh->is_shrink = 0;
- tags.extra_is_shrink = 0;
-
- oh->shadows_obj = 0;
- oh->inband_shadowed_obj_id = 0;
- tags.extra_shadows = 0;
-
- /* Update file size */
- if (object->variant_type ==
- YAFFS_OBJECT_TYPE_FILE) {
- oh->file_size =
- object->variant.
- file_variant.
- file_size;
- tags.extra_length =
- oh->file_size;
- }
-
- yaffs_verify_oh(object, oh,
- &tags, 1);
- new_chunk =
- yaffs_write_new_chunk(dev,
- (u8 *)
- oh,
- &tags,
- 1);
- } else {
- new_chunk =
- yaffs_write_new_chunk(dev,
- buffer,
- &tags,
- 1);
- }
-
- if (new_chunk < 0) {
- ret_val = YAFFS_FAIL;
- } else {
-
- /* Ok, now fix up the Tnodes etc. */
-
- if (tags.chunk_id == 0) {
- /* It's a header */
- object->hdr_chunk =
- new_chunk;
- object->serial =
- tags.serial_number;
- } else {
- /* It's a data chunk */
- int ok;
- ok = yaffs_put_chunk_in_file(object, tags.chunk_id, new_chunk, 0);
- }
- }
- }
-
- if (ret_val == YAFFS_OK)
- yaffs_chunk_del(dev, old_chunk,
- mark_flash, __LINE__);
-
- }
- }
-
- yaffs_release_temp_buffer(dev, buffer, __LINE__);
-
- }
-
- yaffs_verify_collected_blk(dev, bi, block);
-
- if (bi->block_state == YAFFS_BLOCK_STATE_COLLECTING) {
- /*
- * The gc did not complete. Set block state back to FULL
- * because checkpointing does not restore gc.
- */
- bi->block_state = YAFFS_BLOCK_STATE_FULL;
- } else {
- /* The gc completed. */
- /* Do any required cleanups */
- for (i = 0; i < dev->n_clean_ups; i++) {
- /* Time to delete the file too */
- object =
- yaffs_find_by_number(dev, dev->gc_cleanup_list[i]);
- if (object) {
- yaffs_free_tnode(dev,
- object->variant.
- file_variant.top);
- object->variant.file_variant.top = NULL;
- yaffs_trace(YAFFS_TRACE_GC,
- "yaffs: About to finally delete object %d",
- object->obj_id);
- yaffs_generic_obj_del(object);
- object->my_dev->n_deleted_files--;
- }
-
- }
-
- chunks_after = yaffs_get_erased_chunks(dev);
- if (chunks_before >= chunks_after)
- yaffs_trace(YAFFS_TRACE_GC,
- "gc did not increase free chunks before %d after %d",
- chunks_before, chunks_after);
- dev->gc_block = 0;
- dev->gc_chunk = 0;
- dev->n_clean_ups = 0;
- }
-
- dev->gc_disable = 0;
-
- return ret_val;
-}
-
-/*
- * FindBlockForgarbageCollection is used to select the dirtiest block (or close enough)
- * for garbage collection.
- */
-
-static unsigned yaffs_find_gc_block(struct yaffs_dev *dev,
- int aggressive, int background)
-{
- int i;
- int iterations;
- unsigned selected = 0;
- int prioritised = 0;
- int prioritised_exist = 0;
- struct yaffs_block_info *bi;
- int threshold;
-
- /* First let's see if we need to grab a prioritised block */
- if (dev->has_pending_prioritised_gc && !aggressive) {
- dev->gc_dirtiest = 0;
- bi = dev->block_info;
- for (i = dev->internal_start_block;
- i <= dev->internal_end_block && !selected; i++) {
-
- if (bi->gc_prioritise) {
- prioritised_exist = 1;
- if (bi->block_state == YAFFS_BLOCK_STATE_FULL &&
- yaffs_block_ok_for_gc(dev, bi)) {
- selected = i;
- prioritised = 1;
- }
- }
- bi++;
- }
-
- /*
- * If there is a prioritised block and none was selected then
- * this happened because there is at least one old dirty block gumming
- * up the works. Let's gc the oldest dirty block.
- */
-
- if (prioritised_exist &&
- !selected && dev->oldest_dirty_block > 0)
- selected = dev->oldest_dirty_block;
-
- if (!prioritised_exist) /* None found, so we can clear this */
- dev->has_pending_prioritised_gc = 0;
- }
-
- /* If we're doing aggressive GC then we are happy to take a less-dirty block, and
- * search harder.
- * else (we're doing a leasurely gc), then we only bother to do this if the
- * block has only a few pages in use.
- */
-
- if (!selected) {
- int pages_used;
- int n_blocks =
- dev->internal_end_block - dev->internal_start_block + 1;
- if (aggressive) {
- threshold = dev->param.chunks_per_block;
- iterations = n_blocks;
- } else {
- int max_threshold;
-
- if (background)
- max_threshold = dev->param.chunks_per_block / 2;
- else
- max_threshold = dev->param.chunks_per_block / 8;
-
- if (max_threshold < YAFFS_GC_PASSIVE_THRESHOLD)
- max_threshold = YAFFS_GC_PASSIVE_THRESHOLD;
-
- threshold = background ? (dev->gc_not_done + 2) * 2 : 0;
- if (threshold < YAFFS_GC_PASSIVE_THRESHOLD)
- threshold = YAFFS_GC_PASSIVE_THRESHOLD;
- if (threshold > max_threshold)
- threshold = max_threshold;
-
- iterations = n_blocks / 16 + 1;
- if (iterations > 100)
- iterations = 100;
- }
-
- for (i = 0;
- i < iterations &&
- (dev->gc_dirtiest < 1 ||
- dev->gc_pages_in_use > YAFFS_GC_GOOD_ENOUGH); i++) {
- dev->gc_block_finder++;
- if (dev->gc_block_finder < dev->internal_start_block ||
- dev->gc_block_finder > dev->internal_end_block)
- dev->gc_block_finder =
- dev->internal_start_block;
-
- bi = yaffs_get_block_info(dev, dev->gc_block_finder);
-
- pages_used = bi->pages_in_use - bi->soft_del_pages;
-
- if (bi->block_state == YAFFS_BLOCK_STATE_FULL &&
- pages_used < dev->param.chunks_per_block &&
- (dev->gc_dirtiest < 1
- || pages_used < dev->gc_pages_in_use)
- && yaffs_block_ok_for_gc(dev, bi)) {
- dev->gc_dirtiest = dev->gc_block_finder;
- dev->gc_pages_in_use = pages_used;
- }
- }
-
- if (dev->gc_dirtiest > 0 && dev->gc_pages_in_use <= threshold)
- selected = dev->gc_dirtiest;
- }
-
- /*
- * If nothing has been selected for a while, try selecting the oldest dirty
- * because that's gumming up the works.
- */
-
- if (!selected && dev->param.is_yaffs2 &&
- dev->gc_not_done >= (background ? 10 : 20)) {
- yaffs2_find_oldest_dirty_seq(dev);
- if (dev->oldest_dirty_block > 0) {
- selected = dev->oldest_dirty_block;
- dev->gc_dirtiest = selected;
- dev->oldest_dirty_gc_count++;
- bi = yaffs_get_block_info(dev, selected);
- dev->gc_pages_in_use =
- bi->pages_in_use - bi->soft_del_pages;
- } else {
- dev->gc_not_done = 0;
- }
- }
-
- if (selected) {
- yaffs_trace(YAFFS_TRACE_GC,
- "GC Selected block %d with %d free, prioritised:%d",
- selected,
- dev->param.chunks_per_block - dev->gc_pages_in_use,
- prioritised);
-
- dev->n_gc_blocks++;
- if (background)
- dev->bg_gcs++;
-
- dev->gc_dirtiest = 0;
- dev->gc_pages_in_use = 0;
- dev->gc_not_done = 0;
- if (dev->refresh_skip > 0)
- dev->refresh_skip--;
- } else {
- dev->gc_not_done++;
- yaffs_trace(YAFFS_TRACE_GC,
- "GC none: finder %d skip %d threshold %d dirtiest %d using %d oldest %d%s",
- dev->gc_block_finder, dev->gc_not_done, threshold,
- dev->gc_dirtiest, dev->gc_pages_in_use,
- dev->oldest_dirty_block, background ? " bg" : "");
- }
-
- return selected;
-}
-
-/* New garbage collector
- * If we're very low on erased blocks then we do aggressive garbage collection
- * otherwise we do "leasurely" garbage collection.
- * Aggressive gc looks further (whole array) and will accept less dirty blocks.
- * Passive gc only inspects smaller areas and will only accept more dirty blocks.
- *
- * The idea is to help clear out space in a more spread-out manner.
- * Dunno if it really does anything useful.
- */
-static int yaffs_check_gc(struct yaffs_dev *dev, int background)
-{
- int aggressive = 0;
- int gc_ok = YAFFS_OK;
- int max_tries = 0;
- int min_erased;
- int erased_chunks;
- int checkpt_block_adjust;
-
- if (dev->param.gc_control && (dev->param.gc_control(dev) & 1) == 0)
- return YAFFS_OK;
-
- if (dev->gc_disable) {
- /* Bail out so we don't get recursive gc */
- return YAFFS_OK;
- }
-
- /* This loop should pass the first time.
- * We'll only see looping here if the collection does not increase space.
- */
-
- do {
- max_tries++;
-
- checkpt_block_adjust = yaffs_calc_checkpt_blocks_required(dev);
-
- min_erased =
- dev->param.n_reserved_blocks + checkpt_block_adjust + 1;
- erased_chunks =
- dev->n_erased_blocks * dev->param.chunks_per_block;
-
- /* If we need a block soon then do aggressive gc. */
- if (dev->n_erased_blocks < min_erased)
- aggressive = 1;
- else {
- if (!background
- && erased_chunks > (dev->n_free_chunks / 4))
- break;
-
- if (dev->gc_skip > 20)
- dev->gc_skip = 20;
- if (erased_chunks < dev->n_free_chunks / 2 ||
- dev->gc_skip < 1 || background)
- aggressive = 0;
- else {
- dev->gc_skip--;
- break;
- }
- }
-
- dev->gc_skip = 5;
-
- /* If we don't already have a block being gc'd then see if we should start another */
-
- if (dev->gc_block < 1 && !aggressive) {
- dev->gc_block = yaffs2_find_refresh_block(dev);
- dev->gc_chunk = 0;
- dev->n_clean_ups = 0;
- }
- if (dev->gc_block < 1) {
- dev->gc_block =
- yaffs_find_gc_block(dev, aggressive, background);
- dev->gc_chunk = 0;
- dev->n_clean_ups = 0;
- }
-
- if (dev->gc_block > 0) {
- dev->all_gcs++;
- if (!aggressive)
- dev->passive_gc_count++;
-
- yaffs_trace(YAFFS_TRACE_GC,
- "yaffs: GC n_erased_blocks %d aggressive %d",
- dev->n_erased_blocks, aggressive);
-
- gc_ok = yaffs_gc_block(dev, dev->gc_block, aggressive);
- }
-
- if (dev->n_erased_blocks < (dev->param.n_reserved_blocks)
- && dev->gc_block > 0) {
- yaffs_trace(YAFFS_TRACE_GC,
- "yaffs: GC !!!no reclaim!!! n_erased_blocks %d after try %d block %d",
- dev->n_erased_blocks, max_tries,
- dev->gc_block);
- }
- } while ((dev->n_erased_blocks < dev->param.n_reserved_blocks) &&
- (dev->gc_block > 0) && (max_tries < 2));
-
- return aggressive ? gc_ok : YAFFS_OK;
-}
-
-/*
- * yaffs_bg_gc()
- * Garbage collects. Intended to be called from a background thread.
- * Returns non-zero if at least half the free chunks are erased.
- */
-int yaffs_bg_gc(struct yaffs_dev *dev, unsigned urgency)
-{
- int erased_chunks = dev->n_erased_blocks * dev->param.chunks_per_block;
-
- yaffs_trace(YAFFS_TRACE_BACKGROUND, "Background gc %u", urgency);
-
- yaffs_check_gc(dev, 1);
- return erased_chunks > dev->n_free_chunks / 2;
-}
-
-/*-------------------- Data file manipulation -----------------*/
-
-static int yaffs_rd_data_obj(struct yaffs_obj *in, int inode_chunk, u8 * buffer)
-{
- int nand_chunk = yaffs_find_chunk_in_file(in, inode_chunk, NULL);
-
- if (nand_chunk >= 0)
- return yaffs_rd_chunk_tags_nand(in->my_dev, nand_chunk,
- buffer, NULL);
- else {
- yaffs_trace(YAFFS_TRACE_NANDACCESS,
- "Chunk %d not found zero instead",
- nand_chunk);
- /* get sane (zero) data if you read a hole */
- memset(buffer, 0, in->my_dev->data_bytes_per_chunk);
- return 0;
- }
-
-}
-
-void yaffs_chunk_del(struct yaffs_dev *dev, int chunk_id, int mark_flash,
- int lyn)
-{
- int block;
- int page;
- struct yaffs_ext_tags tags;
- struct yaffs_block_info *bi;
-
- if (chunk_id <= 0)
- return;
-
- dev->n_deletions++;
- block = chunk_id / dev->param.chunks_per_block;
- page = chunk_id % dev->param.chunks_per_block;
-
- if (!yaffs_check_chunk_bit(dev, block, page))
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Deleting invalid chunk %d", chunk_id);
-
- bi = yaffs_get_block_info(dev, block);
-
- yaffs2_update_oldest_dirty_seq(dev, block, bi);
-
- yaffs_trace(YAFFS_TRACE_DELETION,
- "line %d delete of chunk %d",
- lyn, chunk_id);
-
- if (!dev->param.is_yaffs2 && mark_flash &&
- bi->block_state != YAFFS_BLOCK_STATE_COLLECTING) {
-
- yaffs_init_tags(&tags);
-
- tags.is_deleted = 1;
-
- yaffs_wr_chunk_tags_nand(dev, chunk_id, NULL, &tags);
- yaffs_handle_chunk_update(dev, chunk_id, &tags);
- } else {
- dev->n_unmarked_deletions++;
- }
-
- /* Pull out of the management area.
- * If the whole block became dirty, this will kick off an erasure.
- */
- if (bi->block_state == YAFFS_BLOCK_STATE_ALLOCATING ||
- bi->block_state == YAFFS_BLOCK_STATE_FULL ||
- bi->block_state == YAFFS_BLOCK_STATE_NEEDS_SCANNING ||
- bi->block_state == YAFFS_BLOCK_STATE_COLLECTING) {
- dev->n_free_chunks++;
-
- yaffs_clear_chunk_bit(dev, block, page);
-
- bi->pages_in_use--;
-
- if (bi->pages_in_use == 0 &&
- !bi->has_shrink_hdr &&
- bi->block_state != YAFFS_BLOCK_STATE_ALLOCATING &&
- bi->block_state != YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
- yaffs_block_became_dirty(dev, block);
- }
-
- }
-
-}
-
-static int yaffs_wr_data_obj(struct yaffs_obj *in, int inode_chunk,
- const u8 * buffer, int n_bytes, int use_reserve)
-{
- /* Find old chunk Need to do this to get serial number
- * Write new one and patch into tree.
- * Invalidate old tags.
- */
-
- int prev_chunk_id;
- struct yaffs_ext_tags prev_tags;
-
- int new_chunk_id;
- struct yaffs_ext_tags new_tags;
-
- struct yaffs_dev *dev = in->my_dev;
-
- yaffs_check_gc(dev, 0);
-
- /* Get the previous chunk at this location in the file if it exists.
- * If it does not exist then put a zero into the tree. This creates
- * the tnode now, rather than later when it is harder to clean up.
- */
- prev_chunk_id = yaffs_find_chunk_in_file(in, inode_chunk, &prev_tags);
- if (prev_chunk_id < 1 &&
- !yaffs_put_chunk_in_file(in, inode_chunk, 0, 0))
- return 0;
-
- /* Set up new tags */
- yaffs_init_tags(&new_tags);
-
- new_tags.chunk_id = inode_chunk;
- new_tags.obj_id = in->obj_id;
- new_tags.serial_number =
- (prev_chunk_id > 0) ? prev_tags.serial_number + 1 : 1;
- new_tags.n_bytes = n_bytes;
-
- if (n_bytes < 1 || n_bytes > dev->param.total_bytes_per_chunk) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "Writing %d bytes to chunk!!!!!!!!!",
- n_bytes);
- YBUG();
- }
-
- new_chunk_id =
- yaffs_write_new_chunk(dev, buffer, &new_tags, use_reserve);
-
- if (new_chunk_id > 0) {
- yaffs_put_chunk_in_file(in, inode_chunk, new_chunk_id, 0);
-
- if (prev_chunk_id > 0)
- yaffs_chunk_del(dev, prev_chunk_id, 1, __LINE__);
-
- yaffs_verify_file_sane(in);
- }
- return new_chunk_id;
-
-}
-
-
-
-static int yaffs_do_xattrib_mod(struct yaffs_obj *obj, int set,
- const YCHAR * name, const void *value, int size,
- int flags)
-{
- struct yaffs_xattr_mod xmod;
-
- int result;
-
- xmod.set = set;
- xmod.name = name;
- xmod.data = value;
- xmod.size = size;
- xmod.flags = flags;
- xmod.result = -ENOSPC;
-
- result = yaffs_update_oh(obj, NULL, 0, 0, 0, &xmod);
-
- if (result > 0)
- return xmod.result;
- else
- return -ENOSPC;
-}
-
-static int yaffs_apply_xattrib_mod(struct yaffs_obj *obj, char *buffer,
- struct yaffs_xattr_mod *xmod)
-{
- int retval = 0;
- int x_offs = sizeof(struct yaffs_obj_hdr);
- struct yaffs_dev *dev = obj->my_dev;
- int x_size = dev->data_bytes_per_chunk - sizeof(struct yaffs_obj_hdr);
-
- char *x_buffer = buffer + x_offs;
-
- if (xmod->set)
- retval =
- nval_set(x_buffer, x_size, xmod->name, xmod->data,
- xmod->size, xmod->flags);
- else
- retval = nval_del(x_buffer, x_size, xmod->name);
-
- obj->has_xattr = nval_hasvalues(x_buffer, x_size);
- obj->xattr_known = 1;
-
- xmod->result = retval;
-
- return retval;
-}
-
-static int yaffs_do_xattrib_fetch(struct yaffs_obj *obj, const YCHAR * name,
- void *value, int size)
-{
- char *buffer = NULL;
- int result;
- struct yaffs_ext_tags tags;
- struct yaffs_dev *dev = obj->my_dev;
- int x_offs = sizeof(struct yaffs_obj_hdr);
- int x_size = dev->data_bytes_per_chunk - sizeof(struct yaffs_obj_hdr);
-
- char *x_buffer;
-
- int retval = 0;
-
- if (obj->hdr_chunk < 1)
- return -ENODATA;
-
- /* If we know that the object has no xattribs then don't do all the
- * reading and parsing.
- */
- if (obj->xattr_known && !obj->has_xattr) {
- if (name)
- return -ENODATA;
- else
- return 0;
- }
-
- buffer = (char *)yaffs_get_temp_buffer(dev, __LINE__);
- if (!buffer)
- return -ENOMEM;
-
- result =
- yaffs_rd_chunk_tags_nand(dev, obj->hdr_chunk, (u8 *) buffer, &tags);
-
- if (result != YAFFS_OK)
- retval = -ENOENT;
- else {
- x_buffer = buffer + x_offs;
-
- if (!obj->xattr_known) {
- obj->has_xattr = nval_hasvalues(x_buffer, x_size);
- obj->xattr_known = 1;
- }
-
- if (name)
- retval = nval_get(x_buffer, x_size, name, value, size);
- else
- retval = nval_list(x_buffer, x_size, value, size);
- }
- yaffs_release_temp_buffer(dev, (u8 *) buffer, __LINE__);
- return retval;
-}
-
-int yaffs_set_xattrib(struct yaffs_obj *obj, const YCHAR * name,
- const void *value, int size, int flags)
-{
- return yaffs_do_xattrib_mod(obj, 1, name, value, size, flags);
-}
-
-int yaffs_remove_xattrib(struct yaffs_obj *obj, const YCHAR * name)
-{
- return yaffs_do_xattrib_mod(obj, 0, name, NULL, 0, 0);
-}
-
-int yaffs_get_xattrib(struct yaffs_obj *obj, const YCHAR * name, void *value,
- int size)
-{
- return yaffs_do_xattrib_fetch(obj, name, value, size);
-}
-
-int yaffs_list_xattrib(struct yaffs_obj *obj, char *buffer, int size)
-{
- return yaffs_do_xattrib_fetch(obj, NULL, buffer, size);
-}
-
-static void yaffs_check_obj_details_loaded(struct yaffs_obj *in)
-{
- u8 *chunk_data;
- struct yaffs_obj_hdr *oh;
- struct yaffs_dev *dev;
- struct yaffs_ext_tags tags;
- int result;
- int alloc_failed = 0;
-
- if (!in)
- return;
-
- dev = in->my_dev;
-
- if (in->lazy_loaded && in->hdr_chunk > 0) {
- in->lazy_loaded = 0;
- chunk_data = yaffs_get_temp_buffer(dev, __LINE__);
-
- result =
- yaffs_rd_chunk_tags_nand(dev, in->hdr_chunk, chunk_data,
- &tags);
- oh = (struct yaffs_obj_hdr *)chunk_data;
-
- in->yst_mode = oh->yst_mode;
- yaffs_load_attribs(in, oh);
- yaffs_set_obj_name_from_oh(in, oh);
-
- if (in->variant_type == YAFFS_OBJECT_TYPE_SYMLINK) {
- in->variant.symlink_variant.alias =
- yaffs_clone_str(oh->alias);
- if (!in->variant.symlink_variant.alias)
- alloc_failed = 1; /* Not returned to caller */
- }
-
- yaffs_release_temp_buffer(dev, chunk_data, __LINE__);
- }
-}
-
-static void yaffs_load_name_from_oh(struct yaffs_dev *dev, YCHAR * name,
- const YCHAR * oh_name, int buff_size)
-{
-#ifdef CONFIG_YAFFS_AUTO_UNICODE
- if (dev->param.auto_unicode) {
- if (*oh_name) {
- /* It is an ASCII name, do an ASCII to
- * unicode conversion */
- const char *ascii_oh_name = (const char *)oh_name;
- int n = buff_size - 1;
- while (n > 0 && *ascii_oh_name) {
- *name = *ascii_oh_name;
- name++;
- ascii_oh_name++;
- n--;
- }
- } else {
- strncpy(name, oh_name + 1, buff_size - 1);
- }
- } else {
-#else
- {
-#endif
- strncpy(name, oh_name, buff_size - 1);
- }
-}
-
-static void yaffs_load_oh_from_name(struct yaffs_dev *dev, YCHAR * oh_name,
- const YCHAR * name)
-{
-#ifdef CONFIG_YAFFS_AUTO_UNICODE
-
- int is_ascii;
- YCHAR *w;
-
- if (dev->param.auto_unicode) {
-
- is_ascii = 1;
- w = name;
-
- /* Figure out if the name will fit in ascii character set */
- while (is_ascii && *w) {
- if ((*w) & 0xff00)
- is_ascii = 0;
- w++;
- }
-
- if (is_ascii) {
- /* It is an ASCII name, so do a unicode to ascii conversion */
- char *ascii_oh_name = (char *)oh_name;
- int n = YAFFS_MAX_NAME_LENGTH - 1;
- while (n > 0 && *name) {
- *ascii_oh_name = *name;
- name++;
- ascii_oh_name++;
- n--;
- }
- } else {
- /* It is a unicode name, so save starting at the second YCHAR */
- *oh_name = 0;
- strncpy(oh_name + 1, name,
- YAFFS_MAX_NAME_LENGTH - 2);
- }
- } else {
-#else
- {
-#endif
- strncpy(oh_name, name, YAFFS_MAX_NAME_LENGTH - 1);
- }
-
-}
-
-/* UpdateObjectHeader updates the header on NAND for an object.
- * If name is not NULL, then that new name is used.
- */
-int yaffs_update_oh(struct yaffs_obj *in, const YCHAR * name, int force,
- int is_shrink, int shadows, struct yaffs_xattr_mod *xmod)
-{
-
- struct yaffs_block_info *bi;
-
- struct yaffs_dev *dev = in->my_dev;
-
- int prev_chunk_id;
- int ret_val = 0;
- int result = 0;
-
- int new_chunk_id;
- struct yaffs_ext_tags new_tags;
- struct yaffs_ext_tags old_tags;
- const YCHAR *alias = NULL;
-
- u8 *buffer = NULL;
- YCHAR old_name[YAFFS_MAX_NAME_LENGTH + 1];
-
- struct yaffs_obj_hdr *oh = NULL;
-
- strcpy(old_name, _Y("silly old name"));
-
- if (!in->fake || in == dev->root_dir ||
- force || xmod) {
-
- yaffs_check_gc(dev, 0);
- yaffs_check_obj_details_loaded(in);
-
- buffer = yaffs_get_temp_buffer(in->my_dev, __LINE__);
- oh = (struct yaffs_obj_hdr *)buffer;
-
- prev_chunk_id = in->hdr_chunk;
-
- if (prev_chunk_id > 0) {
- result = yaffs_rd_chunk_tags_nand(dev, prev_chunk_id,
- buffer, &old_tags);
-
- yaffs_verify_oh(in, oh, &old_tags, 0);
-
- memcpy(old_name, oh->name, sizeof(oh->name));
- memset(buffer, 0xFF, sizeof(struct yaffs_obj_hdr));
- } else {
- memset(buffer, 0xFF, dev->data_bytes_per_chunk);
- }
-
- oh->type = in->variant_type;
- oh->yst_mode = in->yst_mode;
- oh->shadows_obj = oh->inband_shadowed_obj_id = shadows;
-
- yaffs_load_attribs_oh(oh, in);
-
- if (in->parent)
- oh->parent_obj_id = in->parent->obj_id;
- else
- oh->parent_obj_id = 0;
-
- if (name && *name) {
- memset(oh->name, 0, sizeof(oh->name));
- yaffs_load_oh_from_name(dev, oh->name, name);
- } else if (prev_chunk_id > 0) {
- memcpy(oh->name, old_name, sizeof(oh->name));
- } else {
- memset(oh->name, 0, sizeof(oh->name));
- }
-
- oh->is_shrink = is_shrink;
-
- switch (in->variant_type) {
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- /* Should not happen */
- break;
- case YAFFS_OBJECT_TYPE_FILE:
- oh->file_size =
- (oh->parent_obj_id == YAFFS_OBJECTID_DELETED
- || oh->parent_obj_id ==
- YAFFS_OBJECTID_UNLINKED) ? 0 : in->
- variant.file_variant.file_size;
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- oh->equiv_id = in->variant.hardlink_variant.equiv_id;
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- /* Do nothing */
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- /* Do nothing */
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- alias = in->variant.symlink_variant.alias;
- if (!alias)
- alias = _Y("no alias");
- strncpy(oh->alias, alias, YAFFS_MAX_ALIAS_LENGTH);
- oh->alias[YAFFS_MAX_ALIAS_LENGTH] = 0;
- break;
- }
-
- /* process any xattrib modifications */
- if (xmod)
- yaffs_apply_xattrib_mod(in, (char *)buffer, xmod);
-
- /* Tags */
- yaffs_init_tags(&new_tags);
- in->serial++;
- new_tags.chunk_id = 0;
- new_tags.obj_id = in->obj_id;
- new_tags.serial_number = in->serial;
-
- /* Add extra info for file header */
-
- new_tags.extra_available = 1;
- new_tags.extra_parent_id = oh->parent_obj_id;
- new_tags.extra_length = oh->file_size;
- new_tags.extra_is_shrink = oh->is_shrink;
- new_tags.extra_equiv_id = oh->equiv_id;
- new_tags.extra_shadows = (oh->shadows_obj > 0) ? 1 : 0;
- new_tags.extra_obj_type = in->variant_type;
-
- yaffs_verify_oh(in, oh, &new_tags, 1);
-
- /* Create new chunk in NAND */
- new_chunk_id =
- yaffs_write_new_chunk(dev, buffer, &new_tags,
- (prev_chunk_id > 0) ? 1 : 0);
-
- if (new_chunk_id >= 0) {
-
- in->hdr_chunk = new_chunk_id;
-
- if (prev_chunk_id > 0) {
- yaffs_chunk_del(dev, prev_chunk_id, 1,
- __LINE__);
- }
-
- if (!yaffs_obj_cache_dirty(in))
- in->dirty = 0;
-
- /* If this was a shrink, then mark the block that the chunk lives on */
- if (is_shrink) {
- bi = yaffs_get_block_info(in->my_dev,
- new_chunk_id /
- in->my_dev->param.
- chunks_per_block);
- bi->has_shrink_hdr = 1;
- }
-
- }
-
- ret_val = new_chunk_id;
-
- }
-
- if (buffer)
- yaffs_release_temp_buffer(dev, buffer, __LINE__);
-
- return ret_val;
-}
-
-/*--------------------- File read/write ------------------------
- * Read and write have very similar structures.
- * In general the read/write has three parts to it
- * An incomplete chunk to start with (if the read/write is not chunk-aligned)
- * Some complete chunks
- * An incomplete chunk to end off with
- *
- * Curve-balls: the first chunk might also be the last chunk.
- */
-
-int yaffs_file_rd(struct yaffs_obj *in, u8 * buffer, loff_t offset, int n_bytes)
-{
-
- int chunk;
- u32 start;
- int n_copy;
- int n = n_bytes;
- int n_done = 0;
- struct yaffs_cache *cache;
-
- struct yaffs_dev *dev;
-
- dev = in->my_dev;
-
- while (n > 0) {
- /* chunk = offset / dev->data_bytes_per_chunk + 1; */
- /* start = offset % dev->data_bytes_per_chunk; */
- yaffs_addr_to_chunk(dev, offset, &chunk, &start);
- chunk++;
-
- /* OK now check for the curveball where the start and end are in
- * the same chunk.
- */
- if ((start + n) < dev->data_bytes_per_chunk)
- n_copy = n;
- else
- n_copy = dev->data_bytes_per_chunk - start;
-
- cache = yaffs_find_chunk_cache(in, chunk);
-
- /* If the chunk is already in the cache or it is less than a whole chunk
- * or we're using inband tags then use the cache (if there is caching)
- * else bypass the cache.
- */
- if (cache || n_copy != dev->data_bytes_per_chunk
- || dev->param.inband_tags) {
- if (dev->param.n_caches > 0) {
-
- /* If we can't find the data in the cache, then load it up. */
-
- if (!cache) {
- cache =
- yaffs_grab_chunk_cache(in->my_dev);
- cache->object = in;
- cache->chunk_id = chunk;
- cache->dirty = 0;
- cache->locked = 0;
- yaffs_rd_data_obj(in, chunk,
- cache->data);
- cache->n_bytes = 0;
- }
-
- yaffs_use_cache(dev, cache, 0);
-
- cache->locked = 1;
-
- memcpy(buffer, &cache->data[start], n_copy);
-
- cache->locked = 0;
- } else {
- /* Read into the local buffer then copy.. */
-
- u8 *local_buffer =
- yaffs_get_temp_buffer(dev, __LINE__);
- yaffs_rd_data_obj(in, chunk, local_buffer);
-
- memcpy(buffer, &local_buffer[start], n_copy);
-
- yaffs_release_temp_buffer(dev, local_buffer,
- __LINE__);
- }
-
- } else {
-
- /* A full chunk. Read directly into the supplied buffer. */
- yaffs_rd_data_obj(in, chunk, buffer);
-
- }
-
- n -= n_copy;
- offset += n_copy;
- buffer += n_copy;
- n_done += n_copy;
-
- }
-
- return n_done;
-}
-
-int yaffs_do_file_wr(struct yaffs_obj *in, const u8 * buffer, loff_t offset,
- int n_bytes, int write_trhrough)
-{
-
- int chunk;
- u32 start;
- int n_copy;
- int n = n_bytes;
- int n_done = 0;
- int n_writeback;
- int start_write = offset;
- int chunk_written = 0;
- u32 n_bytes_read;
- u32 chunk_start;
-
- struct yaffs_dev *dev;
-
- dev = in->my_dev;
-
- while (n > 0 && chunk_written >= 0) {
- yaffs_addr_to_chunk(dev, offset, &chunk, &start);
-
- if (chunk * dev->data_bytes_per_chunk + start != offset ||
- start >= dev->data_bytes_per_chunk) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "AddrToChunk of offset %d gives chunk %d start %d",
- (int)offset, chunk, start);
- }
- chunk++; /* File pos to chunk in file offset */
-
- /* OK now check for the curveball where the start and end are in
- * the same chunk.
- */
-
- if ((start + n) < dev->data_bytes_per_chunk) {
- n_copy = n;
-
- /* Now folks, to calculate how many bytes to write back....
- * If we're overwriting and not writing to then end of file then
- * we need to write back as much as was there before.
- */
-
- chunk_start = ((chunk - 1) * dev->data_bytes_per_chunk);
-
- if (chunk_start > in->variant.file_variant.file_size)
- n_bytes_read = 0; /* Past end of file */
- else
- n_bytes_read =
- in->variant.file_variant.file_size -
- chunk_start;
-
- if (n_bytes_read > dev->data_bytes_per_chunk)
- n_bytes_read = dev->data_bytes_per_chunk;
-
- n_writeback =
- (n_bytes_read >
- (start + n)) ? n_bytes_read : (start + n);
-
- if (n_writeback < 0
- || n_writeback > dev->data_bytes_per_chunk)
- YBUG();
-
- } else {
- n_copy = dev->data_bytes_per_chunk - start;
- n_writeback = dev->data_bytes_per_chunk;
- }
-
- if (n_copy != dev->data_bytes_per_chunk
- || dev->param.inband_tags) {
- /* An incomplete start or end chunk (or maybe both start and end chunk),
- * or we're using inband tags, so we want to use the cache buffers.
- */
- if (dev->param.n_caches > 0) {
- struct yaffs_cache *cache;
- /* If we can't find the data in the cache, then load the cache */
- cache = yaffs_find_chunk_cache(in, chunk);
-
- if (!cache
- && yaffs_check_alloc_available(dev, 1)) {
- cache = yaffs_grab_chunk_cache(dev);
- cache->object = in;
- cache->chunk_id = chunk;
- cache->dirty = 0;
- cache->locked = 0;
- yaffs_rd_data_obj(in, chunk,
- cache->data);
- } else if (cache &&
- !cache->dirty &&
- !yaffs_check_alloc_available(dev,
- 1)) {
- /* Drop the cache if it was a read cache item and
- * no space check has been made for it.
- */
- cache = NULL;
- }
-
- if (cache) {
- yaffs_use_cache(dev, cache, 1);
- cache->locked = 1;
-
- memcpy(&cache->data[start], buffer,
- n_copy);
-
- cache->locked = 0;
- cache->n_bytes = n_writeback;
-
- if (write_trhrough) {
- chunk_written =
- yaffs_wr_data_obj
- (cache->object,
- cache->chunk_id,
- cache->data,
- cache->n_bytes, 1);
- cache->dirty = 0;
- }
-
- } else {
- chunk_written = -1; /* fail the write */
- }
- } else {
- /* An incomplete start or end chunk (or maybe both start and end chunk)
- * Read into the local buffer then copy, then copy over and write back.
- */
-
- u8 *local_buffer =
- yaffs_get_temp_buffer(dev, __LINE__);
-
- yaffs_rd_data_obj(in, chunk, local_buffer);
-
- memcpy(&local_buffer[start], buffer, n_copy);
-
- chunk_written =
- yaffs_wr_data_obj(in, chunk,
- local_buffer,
- n_writeback, 0);
-
- yaffs_release_temp_buffer(dev, local_buffer,
- __LINE__);
-
- }
-
- } else {
- /* A full chunk. Write directly from the supplied buffer. */
-
- chunk_written =
- yaffs_wr_data_obj(in, chunk, buffer,
- dev->data_bytes_per_chunk, 0);
-
- /* Since we've overwritten the cached data, we better invalidate it. */
- yaffs_invalidate_chunk_cache(in, chunk);
- }
-
- if (chunk_written >= 0) {
- n -= n_copy;
- offset += n_copy;
- buffer += n_copy;
- n_done += n_copy;
- }
-
- }
-
- /* Update file object */
-
- if ((start_write + n_done) > in->variant.file_variant.file_size)
- in->variant.file_variant.file_size = (start_write + n_done);
-
- in->dirty = 1;
-
- return n_done;
-}
-
-int yaffs_wr_file(struct yaffs_obj *in, const u8 * buffer, loff_t offset,
- int n_bytes, int write_trhrough)
-{
- yaffs2_handle_hole(in, offset);
- return yaffs_do_file_wr(in, buffer, offset, n_bytes, write_trhrough);
-}
-
-/* ---------------------- File resizing stuff ------------------ */
-
-static void yaffs_prune_chunks(struct yaffs_obj *in, int new_size)
-{
-
- struct yaffs_dev *dev = in->my_dev;
- int old_size = in->variant.file_variant.file_size;
-
- int last_del = 1 + (old_size - 1) / dev->data_bytes_per_chunk;
-
- int start_del = 1 + (new_size + dev->data_bytes_per_chunk - 1) /
- dev->data_bytes_per_chunk;
- int i;
- int chunk_id;
-
- /* Delete backwards so that we don't end up with holes if
- * power is lost part-way through the operation.
- */
- for (i = last_del; i >= start_del; i--) {
- /* NB this could be optimised somewhat,
- * eg. could retrieve the tags and write them without
- * using yaffs_chunk_del
- */
-
- chunk_id = yaffs_find_del_file_chunk(in, i, NULL);
- if (chunk_id > 0) {
- if (chunk_id <
- (dev->internal_start_block *
- dev->param.chunks_per_block)
- || chunk_id >=
- ((dev->internal_end_block +
- 1) * dev->param.chunks_per_block)) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Found daft chunk_id %d for %d",
- chunk_id, i);
- } else {
- in->n_data_chunks--;
- yaffs_chunk_del(dev, chunk_id, 1, __LINE__);
- }
- }
- }
-
-}
-
-void yaffs_resize_file_down(struct yaffs_obj *obj, loff_t new_size)
-{
- int new_full;
- u32 new_partial;
- struct yaffs_dev *dev = obj->my_dev;
-
- yaffs_addr_to_chunk(dev, new_size, &new_full, &new_partial);
-
- yaffs_prune_chunks(obj, new_size);
-
- if (new_partial != 0) {
- int last_chunk = 1 + new_full;
- u8 *local_buffer = yaffs_get_temp_buffer(dev, __LINE__);
-
- /* Rewrite the last chunk with its new size and zero pad */
- yaffs_rd_data_obj(obj, last_chunk, local_buffer);
- memset(local_buffer + new_partial, 0,
- dev->data_bytes_per_chunk - new_partial);
-
- yaffs_wr_data_obj(obj, last_chunk, local_buffer,
- new_partial, 1);
-
- yaffs_release_temp_buffer(dev, local_buffer, __LINE__);
- }
-
- obj->variant.file_variant.file_size = new_size;
-
- yaffs_prune_tree(dev, &obj->variant.file_variant);
-}
-
-int yaffs_resize_file(struct yaffs_obj *in, loff_t new_size)
-{
- struct yaffs_dev *dev = in->my_dev;
- int old_size = in->variant.file_variant.file_size;
-
- yaffs_flush_file_cache(in);
- yaffs_invalidate_whole_cache(in);
-
- yaffs_check_gc(dev, 0);
-
- if (in->variant_type != YAFFS_OBJECT_TYPE_FILE)
- return YAFFS_FAIL;
-
- if (new_size == old_size)
- return YAFFS_OK;
-
- if (new_size > old_size) {
- yaffs2_handle_hole(in, new_size);
- in->variant.file_variant.file_size = new_size;
- } else {
- /* new_size < old_size */
- yaffs_resize_file_down(in, new_size);
- }
-
- /* Write a new object header to reflect the resize.
- * show we've shrunk the file, if need be
- * Do this only if the file is not in the deleted directories
- * and is not shadowed.
- */
- if (in->parent &&
- !in->is_shadowed &&
- in->parent->obj_id != YAFFS_OBJECTID_UNLINKED &&
- in->parent->obj_id != YAFFS_OBJECTID_DELETED)
- yaffs_update_oh(in, NULL, 0, 0, 0, NULL);
-
- return YAFFS_OK;
-}
-
-int yaffs_flush_file(struct yaffs_obj *in, int update_time, int data_sync)
-{
- int ret_val;
- if (in->dirty) {
- yaffs_flush_file_cache(in);
- if (data_sync) /* Only sync data */
- ret_val = YAFFS_OK;
- else {
- if (update_time)
- yaffs_load_current_time(in, 0, 0);
-
- ret_val = (yaffs_update_oh(in, NULL, 0, 0, 0, NULL) >=
- 0) ? YAFFS_OK : YAFFS_FAIL;
- }
- } else {
- ret_val = YAFFS_OK;
- }
-
- return ret_val;
-
-}
-
-
-/* yaffs_del_file deletes the whole file data
- * and the inode associated with the file.
- * It does not delete the links associated with the file.
- */
-static int yaffs_unlink_file_if_needed(struct yaffs_obj *in)
-{
-
- int ret_val;
- int del_now = 0;
- struct yaffs_dev *dev = in->my_dev;
-
- if (!in->my_inode)
- del_now = 1;
-
- if (del_now) {
- ret_val =
- yaffs_change_obj_name(in, in->my_dev->del_dir,
- _Y("deleted"), 0, 0);
- yaffs_trace(YAFFS_TRACE_TRACING,
- "yaffs: immediate deletion of file %d",
- in->obj_id);
- in->deleted = 1;
- in->my_dev->n_deleted_files++;
- if (dev->param.disable_soft_del || dev->param.is_yaffs2)
- yaffs_resize_file(in, 0);
- yaffs_soft_del_file(in);
- } else {
- ret_val =
- yaffs_change_obj_name(in, in->my_dev->unlinked_dir,
- _Y("unlinked"), 0, 0);
- }
-
- return ret_val;
-}
-
-int yaffs_del_file(struct yaffs_obj *in)
-{
- int ret_val = YAFFS_OK;
- int deleted; /* Need to cache value on stack if in is freed */
- struct yaffs_dev *dev = in->my_dev;
-
- if (dev->param.disable_soft_del || dev->param.is_yaffs2)
- yaffs_resize_file(in, 0);
-
- if (in->n_data_chunks > 0) {
- /* Use soft deletion if there is data in the file.
- * That won't be the case if it has been resized to zero.
- */
- if (!in->unlinked)
- ret_val = yaffs_unlink_file_if_needed(in);
-
- deleted = in->deleted;
-
- if (ret_val == YAFFS_OK && in->unlinked && !in->deleted) {
- in->deleted = 1;
- deleted = 1;
- in->my_dev->n_deleted_files++;
- yaffs_soft_del_file(in);
- }
- return deleted ? YAFFS_OK : YAFFS_FAIL;
- } else {
- /* The file has no data chunks so we toss it immediately */
- yaffs_free_tnode(in->my_dev, in->variant.file_variant.top);
- in->variant.file_variant.top = NULL;
- yaffs_generic_obj_del(in);
-
- return YAFFS_OK;
- }
-}
-
-int yaffs_is_non_empty_dir(struct yaffs_obj *obj)
-{
- return (obj &&
- obj->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY) &&
- !(list_empty(&obj->variant.dir_variant.children));
-}
-
-static int yaffs_del_dir(struct yaffs_obj *obj)
-{
- /* First check that the directory is empty. */
- if (yaffs_is_non_empty_dir(obj))
- return YAFFS_FAIL;
-
- return yaffs_generic_obj_del(obj);
-}
-
-static int yaffs_del_symlink(struct yaffs_obj *in)
-{
- if (in->variant.symlink_variant.alias)
- kfree(in->variant.symlink_variant.alias);
- in->variant.symlink_variant.alias = NULL;
-
- return yaffs_generic_obj_del(in);
-}
-
-static int yaffs_del_link(struct yaffs_obj *in)
-{
- /* remove this hardlink from the list assocaited with the equivalent
- * object
- */
- list_del_init(&in->hard_links);
- return yaffs_generic_obj_del(in);
-}
-
-int yaffs_del_obj(struct yaffs_obj *obj)
-{
- int ret_val = -1;
- switch (obj->variant_type) {
- case YAFFS_OBJECT_TYPE_FILE:
- ret_val = yaffs_del_file(obj);
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- if (!list_empty(&obj->variant.dir_variant.dirty)) {
- yaffs_trace(YAFFS_TRACE_BACKGROUND,
- "Remove object %d from dirty directories",
- obj->obj_id);
- list_del_init(&obj->variant.dir_variant.dirty);
- }
- return yaffs_del_dir(obj);
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- ret_val = yaffs_del_symlink(obj);
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- ret_val = yaffs_del_link(obj);
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- ret_val = yaffs_generic_obj_del(obj);
- break;
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- ret_val = 0;
- break; /* should not happen. */
- }
-
- return ret_val;
-}
-
-static int yaffs_unlink_worker(struct yaffs_obj *obj)
-{
-
- int del_now = 0;
-
- if (!obj->my_inode)
- del_now = 1;
-
- if (obj)
- yaffs_update_parent(obj->parent);
-
- if (obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK) {
- return yaffs_del_link(obj);
- } else if (!list_empty(&obj->hard_links)) {
- /* Curve ball: We're unlinking an object that has a hardlink.
- *
- * This problem arises because we are not strictly following
- * The Linux link/inode model.
- *
- * We can't really delete the object.
- * Instead, we do the following:
- * - Select a hardlink.
- * - Unhook it from the hard links
- * - Move it from its parent directory (so that the rename can work)
- * - Rename the object to the hardlink's name.
- * - Delete the hardlink
- */
-
- struct yaffs_obj *hl;
- struct yaffs_obj *parent;
- int ret_val;
- YCHAR name[YAFFS_MAX_NAME_LENGTH + 1];
-
- hl = list_entry(obj->hard_links.next, struct yaffs_obj,
- hard_links);
-
- yaffs_get_obj_name(hl, name, YAFFS_MAX_NAME_LENGTH + 1);
- parent = hl->parent;
-
- list_del_init(&hl->hard_links);
-
- yaffs_add_obj_to_dir(obj->my_dev->unlinked_dir, hl);
-
- ret_val = yaffs_change_obj_name(obj, parent, name, 0, 0);
-
- if (ret_val == YAFFS_OK)
- ret_val = yaffs_generic_obj_del(hl);
-
- return ret_val;
-
- } else if (del_now) {
- switch (obj->variant_type) {
- case YAFFS_OBJECT_TYPE_FILE:
- return yaffs_del_file(obj);
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- list_del_init(&obj->variant.dir_variant.dirty);
- return yaffs_del_dir(obj);
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- return yaffs_del_symlink(obj);
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- return yaffs_generic_obj_del(obj);
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- default:
- return YAFFS_FAIL;
- }
- } else if (yaffs_is_non_empty_dir(obj)) {
- return YAFFS_FAIL;
- } else {
- return yaffs_change_obj_name(obj, obj->my_dev->unlinked_dir,
- _Y("unlinked"), 0, 0);
- }
-}
-
-static int yaffs_unlink_obj(struct yaffs_obj *obj)
-{
-
- if (obj && obj->unlink_allowed)
- return yaffs_unlink_worker(obj);
-
- return YAFFS_FAIL;
-
-}
-
-int yaffs_unlinker(struct yaffs_obj *dir, const YCHAR * name)
-{
- struct yaffs_obj *obj;
-
- obj = yaffs_find_by_name(dir, name);
- return yaffs_unlink_obj(obj);
-}
-
-/* Note:
- * If old_name is NULL then we take old_dir as the object to be renamed.
- */
-int yaffs_rename_obj(struct yaffs_obj *old_dir, const YCHAR * old_name,
- struct yaffs_obj *new_dir, const YCHAR * new_name)
-{
- struct yaffs_obj *obj = NULL;
- struct yaffs_obj *existing_target = NULL;
- int force = 0;
- int result;
- struct yaffs_dev *dev;
-
- if (!old_dir || old_dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY)
- YBUG();
- if (!new_dir || new_dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY)
- YBUG();
-
- dev = old_dir->my_dev;
-
-#ifdef CONFIG_YAFFS_CASE_INSENSITIVE
- /* Special case for case insemsitive systems.
- * While look-up is case insensitive, the name isn't.
- * Therefore we might want to change x.txt to X.txt
- */
- if (old_dir == new_dir &&
- old_name && new_name &&
- strcmp(old_name, new_name) == 0)
- force = 1;
-#endif
-
- if (strnlen(new_name, YAFFS_MAX_NAME_LENGTH + 1) >
- YAFFS_MAX_NAME_LENGTH)
- /* ENAMETOOLONG */
- return YAFFS_FAIL;
-
- if(old_name)
- obj = yaffs_find_by_name(old_dir, old_name);
- else{
- obj = old_dir;
- old_dir = obj->parent;
- }
-
-
- if (obj && obj->rename_allowed) {
-
- /* Now do the handling for an existing target, if there is one */
-
- existing_target = yaffs_find_by_name(new_dir, new_name);
- if (yaffs_is_non_empty_dir(existing_target)){
- return YAFFS_FAIL; /* ENOTEMPTY */
- } else if (existing_target && existing_target != obj) {
- /* Nuke the target first, using shadowing,
- * but only if it isn't the same object.
- *
- * Note we must disable gc otherwise it can mess up the shadowing.
- *
- */
- dev->gc_disable = 1;
- yaffs_change_obj_name(obj, new_dir, new_name, force,
- existing_target->obj_id);
- existing_target->is_shadowed = 1;
- yaffs_unlink_obj(existing_target);
- dev->gc_disable = 0;
- }
-
- result = yaffs_change_obj_name(obj, new_dir, new_name, 1, 0);
-
- yaffs_update_parent(old_dir);
- if (new_dir != old_dir)
- yaffs_update_parent(new_dir);
-
- return result;
- }
- return YAFFS_FAIL;
-}
-
-/*----------------------- Initialisation Scanning ---------------------- */
-
-void yaffs_handle_shadowed_obj(struct yaffs_dev *dev, int obj_id,
- int backward_scanning)
-{
- struct yaffs_obj *obj;
-
- if (!backward_scanning) {
- /* Handle YAFFS1 forward scanning case
- * For YAFFS1 we always do the deletion
- */
-
- } else {
- /* Handle YAFFS2 case (backward scanning)
- * If the shadowed object exists then ignore.
- */
- obj = yaffs_find_by_number(dev, obj_id);
- if (obj)
- return;
- }
-
- /* Let's create it (if it does not exist) assuming it is a file so that it can do shrinking etc.
- * We put it in unlinked dir to be cleaned up after the scanning
- */
- obj =
- yaffs_find_or_create_by_number(dev, obj_id, YAFFS_OBJECT_TYPE_FILE);
- if (!obj)
- return;
- obj->is_shadowed = 1;
- yaffs_add_obj_to_dir(dev->unlinked_dir, obj);
- obj->variant.file_variant.shrink_size = 0;
- obj->valid = 1; /* So that we don't read any other info for this file */
-
-}
-
-void yaffs_link_fixup(struct yaffs_dev *dev, struct yaffs_obj *hard_list)
-{
- struct yaffs_obj *hl;
- struct yaffs_obj *in;
-
- while (hard_list) {
- hl = hard_list;
- hard_list = (struct yaffs_obj *)(hard_list->hard_links.next);
-
- in = yaffs_find_by_number(dev,
- hl->variant.
- hardlink_variant.equiv_id);
-
- if (in) {
- /* Add the hardlink pointers */
- hl->variant.hardlink_variant.equiv_obj = in;
- list_add(&hl->hard_links, &in->hard_links);
- } else {
- /* Todo Need to report/handle this better.
- * Got a problem... hardlink to a non-existant object
- */
- hl->variant.hardlink_variant.equiv_obj = NULL;
- INIT_LIST_HEAD(&hl->hard_links);
-
- }
- }
-}
-
-static void yaffs_strip_deleted_objs(struct yaffs_dev *dev)
-{
- /*
- * Sort out state of unlinked and deleted objects after scanning.
- */
- struct list_head *i;
- struct list_head *n;
- struct yaffs_obj *l;
-
- if (dev->read_only)
- return;
-
- /* Soft delete all the unlinked files */
- list_for_each_safe(i, n,
- &dev->unlinked_dir->variant.dir_variant.children) {
- if (i) {
- l = list_entry(i, struct yaffs_obj, siblings);
- yaffs_del_obj(l);
- }
- }
-
- list_for_each_safe(i, n, &dev->del_dir->variant.dir_variant.children) {
- if (i) {
- l = list_entry(i, struct yaffs_obj, siblings);
- yaffs_del_obj(l);
- }
- }
-
-}
-
-/*
- * This code iterates through all the objects making sure that they are rooted.
- * Any unrooted objects are re-rooted in lost+found.
- * An object needs to be in one of:
- * - Directly under deleted, unlinked
- * - Directly or indirectly under root.
- *
- * Note:
- * This code assumes that we don't ever change the current relationships between
- * directories:
- * root_dir->parent == unlinked_dir->parent == del_dir->parent == NULL
- * lost-n-found->parent == root_dir
- *
- * This fixes the problem where directories might have inadvertently been deleted
- * leaving the object "hanging" without being rooted in the directory tree.
- */
-
-static int yaffs_has_null_parent(struct yaffs_dev *dev, struct yaffs_obj *obj)
-{
- return (obj == dev->del_dir ||
- obj == dev->unlinked_dir || obj == dev->root_dir);
-}
-
-static void yaffs_fix_hanging_objs(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj;
- struct yaffs_obj *parent;
- int i;
- struct list_head *lh;
- struct list_head *n;
- int depth_limit;
- int hanging;
-
- if (dev->read_only)
- return;
-
- /* Iterate through the objects in each hash entry,
- * looking at each object.
- * Make sure it is rooted.
- */
-
- for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) {
- list_for_each_safe(lh, n, &dev->obj_bucket[i].list) {
- if (lh) {
- obj =
- list_entry(lh, struct yaffs_obj, hash_link);
- parent = obj->parent;
-
- if (yaffs_has_null_parent(dev, obj)) {
- /* These directories are not hanging */
- hanging = 0;
- } else if (!parent
- || parent->variant_type !=
- YAFFS_OBJECT_TYPE_DIRECTORY) {
- hanging = 1;
- } else if (yaffs_has_null_parent(dev, parent)) {
- hanging = 0;
- } else {
- /*
- * Need to follow the parent chain to see if it is hanging.
- */
- hanging = 0;
- depth_limit = 100;
-
- while (parent != dev->root_dir &&
- parent->parent &&
- parent->parent->variant_type ==
- YAFFS_OBJECT_TYPE_DIRECTORY
- && depth_limit > 0) {
- parent = parent->parent;
- depth_limit--;
- }
- if (parent != dev->root_dir)
- hanging = 1;
- }
- if (hanging) {
- yaffs_trace(YAFFS_TRACE_SCAN,
- "Hanging object %d moved to lost and found",
- obj->obj_id);
- yaffs_add_obj_to_dir(dev->lost_n_found,
- obj);
- }
- }
- }
- }
-}
-
-/*
- * Delete directory contents for cleaning up lost and found.
- */
-static void yaffs_del_dir_contents(struct yaffs_obj *dir)
-{
- struct yaffs_obj *obj;
- struct list_head *lh;
- struct list_head *n;
-
- if (dir->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY)
- YBUG();
-
- list_for_each_safe(lh, n, &dir->variant.dir_variant.children) {
- if (lh) {
- obj = list_entry(lh, struct yaffs_obj, siblings);
- if (obj->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY)
- yaffs_del_dir_contents(obj);
-
- yaffs_trace(YAFFS_TRACE_SCAN,
- "Deleting lost_found object %d",
- obj->obj_id);
-
- /* Need to use UnlinkObject since Delete would not handle
- * hardlinked objects correctly.
- */
- yaffs_unlink_obj(obj);
- }
- }
-
-}
-
-static void yaffs_empty_l_n_f(struct yaffs_dev *dev)
-{
- yaffs_del_dir_contents(dev->lost_n_found);
-}
-
-
-struct yaffs_obj *yaffs_find_by_name(struct yaffs_obj *directory,
- const YCHAR * name)
-{
- int sum;
-
- struct list_head *i;
- YCHAR buffer[YAFFS_MAX_NAME_LENGTH + 1];
-
- struct yaffs_obj *l;
-
- if (!name)
- return NULL;
-
- if (!directory) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "tragedy: yaffs_find_by_name: null pointer directory"
- );
- YBUG();
- return NULL;
- }
- if (directory->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "tragedy: yaffs_find_by_name: non-directory"
- );
- YBUG();
- }
-
- sum = yaffs_calc_name_sum(name);
-
- list_for_each(i, &directory->variant.dir_variant.children) {
- if (i) {
- l = list_entry(i, struct yaffs_obj, siblings);
-
- if (l->parent != directory)
- YBUG();
-
- yaffs_check_obj_details_loaded(l);
-
- /* Special case for lost-n-found */
- if (l->obj_id == YAFFS_OBJECTID_LOSTNFOUND) {
- if (!strcmp(name, YAFFS_LOSTNFOUND_NAME))
- return l;
- } else if (l->sum == sum
- || l->hdr_chunk <= 0) {
- /* LostnFound chunk called Objxxx
- * Do a real check
- */
- yaffs_get_obj_name(l, buffer,
- YAFFS_MAX_NAME_LENGTH + 1);
- if (strncmp
- (name, buffer, YAFFS_MAX_NAME_LENGTH) == 0)
- return l;
- }
- }
- }
-
- return NULL;
-}
-
-/* GetEquivalentObject dereferences any hard links to get to the
- * actual object.
- */
-
-struct yaffs_obj *yaffs_get_equivalent_obj(struct yaffs_obj *obj)
-{
- if (obj && obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK) {
- /* We want the object id of the equivalent object, not this one */
- obj = obj->variant.hardlink_variant.equiv_obj;
- yaffs_check_obj_details_loaded(obj);
- }
- return obj;
-}
-
-/*
- * A note or two on object names.
- * * If the object name is missing, we then make one up in the form objnnn
- *
- * * ASCII names are stored in the object header's name field from byte zero
- * * Unicode names are historically stored starting from byte zero.
- *
- * Then there are automatic Unicode names...
- * The purpose of these is to save names in a way that can be read as
- * ASCII or Unicode names as appropriate, thus allowing a Unicode and ASCII
- * system to share files.
- *
- * These automatic unicode are stored slightly differently...
- * - If the name can fit in the ASCII character space then they are saved as
- * ascii names as per above.
- * - If the name needs Unicode then the name is saved in Unicode
- * starting at oh->name[1].
-
- */
-static void yaffs_fix_null_name(struct yaffs_obj *obj, YCHAR * name,
- int buffer_size)
-{
- /* Create an object name if we could not find one. */
- if (strnlen(name, YAFFS_MAX_NAME_LENGTH) == 0) {
- YCHAR local_name[20];
- YCHAR num_string[20];
- YCHAR *x = &num_string[19];
- unsigned v = obj->obj_id;
- num_string[19] = 0;
- while (v > 0) {
- x--;
- *x = '0' + (v % 10);
- v /= 10;
- }
- /* make up a name */
- strcpy(local_name, YAFFS_LOSTNFOUND_PREFIX);
- strcat(local_name, x);
- strncpy(name, local_name, buffer_size - 1);
- }
-}
-
-int yaffs_get_obj_name(struct yaffs_obj *obj, YCHAR * name, int buffer_size)
-{
- memset(name, 0, buffer_size * sizeof(YCHAR));
-
- yaffs_check_obj_details_loaded(obj);
-
- if (obj->obj_id == YAFFS_OBJECTID_LOSTNFOUND) {
- strncpy(name, YAFFS_LOSTNFOUND_NAME, buffer_size - 1);
- }
-#ifndef CONFIG_YAFFS_NO_SHORT_NAMES
- else if (obj->short_name[0]) {
- strcpy(name, obj->short_name);
- }
-#endif
- else if (obj->hdr_chunk > 0) {
- int result;
- u8 *buffer = yaffs_get_temp_buffer(obj->my_dev, __LINE__);
-
- struct yaffs_obj_hdr *oh = (struct yaffs_obj_hdr *)buffer;
-
- memset(buffer, 0, obj->my_dev->data_bytes_per_chunk);
-
- if (obj->hdr_chunk > 0) {
- result = yaffs_rd_chunk_tags_nand(obj->my_dev,
- obj->hdr_chunk,
- buffer, NULL);
- }
- yaffs_load_name_from_oh(obj->my_dev, name, oh->name,
- buffer_size);
-
- yaffs_release_temp_buffer(obj->my_dev, buffer, __LINE__);
- }
-
- yaffs_fix_null_name(obj, name, buffer_size);
-
- return strnlen(name, YAFFS_MAX_NAME_LENGTH);
-}
-
-int yaffs_get_obj_length(struct yaffs_obj *obj)
-{
- /* Dereference any hard linking */
- obj = yaffs_get_equivalent_obj(obj);
-
- if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE)
- return obj->variant.file_variant.file_size;
- if (obj->variant_type == YAFFS_OBJECT_TYPE_SYMLINK) {
- if (!obj->variant.symlink_variant.alias)
- return 0;
- return strnlen(obj->variant.symlink_variant.alias,
- YAFFS_MAX_ALIAS_LENGTH);
- } else {
- /* Only a directory should drop through to here */
- return obj->my_dev->data_bytes_per_chunk;
- }
-}
-
-int yaffs_get_obj_link_count(struct yaffs_obj *obj)
-{
- int count = 0;
- struct list_head *i;
-
- if (!obj->unlinked)
- count++; /* the object itself */
-
- list_for_each(i, &obj->hard_links)
- count++; /* add the hard links; */
-
- return count;
-}
-
-int yaffs_get_obj_inode(struct yaffs_obj *obj)
-{
- obj = yaffs_get_equivalent_obj(obj);
-
- return obj->obj_id;
-}
-
-unsigned yaffs_get_obj_type(struct yaffs_obj *obj)
-{
- obj = yaffs_get_equivalent_obj(obj);
-
- switch (obj->variant_type) {
- case YAFFS_OBJECT_TYPE_FILE:
- return DT_REG;
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- return DT_DIR;
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- return DT_LNK;
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- return DT_REG;
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- if (S_ISFIFO(obj->yst_mode))
- return DT_FIFO;
- if (S_ISCHR(obj->yst_mode))
- return DT_CHR;
- if (S_ISBLK(obj->yst_mode))
- return DT_BLK;
- if (S_ISSOCK(obj->yst_mode))
- return DT_SOCK;
- default:
- return DT_REG;
- break;
- }
-}
-
-YCHAR *yaffs_get_symlink_alias(struct yaffs_obj *obj)
-{
- obj = yaffs_get_equivalent_obj(obj);
- if (obj->variant_type == YAFFS_OBJECT_TYPE_SYMLINK)
- return yaffs_clone_str(obj->variant.symlink_variant.alias);
- else
- return yaffs_clone_str(_Y(""));
-}
-
-/*--------------------------- Initialisation code -------------------------- */
-
-static int yaffs_check_dev_fns(const struct yaffs_dev *dev)
-{
-
- /* Common functions, gotta have */
- if (!dev->param.erase_fn || !dev->param.initialise_flash_fn)
- return 0;
-
-#ifdef CONFIG_YAFFS_YAFFS2
-
- /* Can use the "with tags" style interface for yaffs1 or yaffs2 */
- if (dev->param.write_chunk_tags_fn &&
- dev->param.read_chunk_tags_fn &&
- !dev->param.write_chunk_fn &&
- !dev->param.read_chunk_fn &&
- dev->param.bad_block_fn && dev->param.query_block_fn)
- return 1;
-#endif
-
- /* Can use the "spare" style interface for yaffs1 */
- if (!dev->param.is_yaffs2 &&
- !dev->param.write_chunk_tags_fn &&
- !dev->param.read_chunk_tags_fn &&
- dev->param.write_chunk_fn &&
- dev->param.read_chunk_fn &&
- !dev->param.bad_block_fn && !dev->param.query_block_fn)
- return 1;
-
- return 0; /* bad */
-}
-
-static int yaffs_create_initial_dir(struct yaffs_dev *dev)
-{
- /* Initialise the unlinked, deleted, root and lost and found directories */
-
- dev->lost_n_found = dev->root_dir = NULL;
- dev->unlinked_dir = dev->del_dir = NULL;
-
- dev->unlinked_dir =
- yaffs_create_fake_dir(dev, YAFFS_OBJECTID_UNLINKED, S_IFDIR);
-
- dev->del_dir =
- yaffs_create_fake_dir(dev, YAFFS_OBJECTID_DELETED, S_IFDIR);
-
- dev->root_dir =
- yaffs_create_fake_dir(dev, YAFFS_OBJECTID_ROOT,
- YAFFS_ROOT_MODE | S_IFDIR);
- dev->lost_n_found =
- yaffs_create_fake_dir(dev, YAFFS_OBJECTID_LOSTNFOUND,
- YAFFS_LOSTNFOUND_MODE | S_IFDIR);
-
- if (dev->lost_n_found && dev->root_dir && dev->unlinked_dir
- && dev->del_dir) {
- yaffs_add_obj_to_dir(dev->root_dir, dev->lost_n_found);
- return YAFFS_OK;
- }
-
- return YAFFS_FAIL;
-}
-
-int yaffs_guts_initialise(struct yaffs_dev *dev)
-{
- int init_failed = 0;
- unsigned x;
- int bits;
-
- yaffs_trace(YAFFS_TRACE_TRACING, "yaffs: yaffs_guts_initialise()" );
-
- /* Check stuff that must be set */
-
- if (!dev) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs: Need a device"
- );
- return YAFFS_FAIL;
- }
-
- dev->internal_start_block = dev->param.start_block;
- dev->internal_end_block = dev->param.end_block;
- dev->block_offset = 0;
- dev->chunk_offset = 0;
- dev->n_free_chunks = 0;
-
- dev->gc_block = 0;
-
- if (dev->param.start_block == 0) {
- dev->internal_start_block = dev->param.start_block + 1;
- dev->internal_end_block = dev->param.end_block + 1;
- dev->block_offset = 1;
- dev->chunk_offset = dev->param.chunks_per_block;
- }
-
- /* Check geometry parameters. */
-
- if ((!dev->param.inband_tags && dev->param.is_yaffs2 &&
- dev->param.total_bytes_per_chunk < 1024) ||
- (!dev->param.is_yaffs2 &&
- dev->param.total_bytes_per_chunk < 512) ||
- (dev->param.inband_tags && !dev->param.is_yaffs2) ||
- dev->param.chunks_per_block < 2 ||
- dev->param.n_reserved_blocks < 2 ||
- dev->internal_start_block <= 0 ||
- dev->internal_end_block <= 0 ||
- dev->internal_end_block <=
- (dev->internal_start_block + dev->param.n_reserved_blocks + 2)
- ) {
- /* otherwise it is too small */
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "NAND geometry problems: chunk size %d, type is yaffs%s, inband_tags %d ",
- dev->param.total_bytes_per_chunk,
- dev->param.is_yaffs2 ? "2" : "",
- dev->param.inband_tags);
- return YAFFS_FAIL;
- }
-
- if (yaffs_init_nand(dev) != YAFFS_OK) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "InitialiseNAND failed");
- return YAFFS_FAIL;
- }
-
- /* Sort out space for inband tags, if required */
- if (dev->param.inband_tags)
- dev->data_bytes_per_chunk =
- dev->param.total_bytes_per_chunk -
- sizeof(struct yaffs_packed_tags2_tags_only);
- else
- dev->data_bytes_per_chunk = dev->param.total_bytes_per_chunk;
-
- /* Got the right mix of functions? */
- if (!yaffs_check_dev_fns(dev)) {
- /* Function missing */
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "device function(s) missing or wrong");
-
- return YAFFS_FAIL;
- }
-
- if (dev->is_mounted) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "device already mounted");
- return YAFFS_FAIL;
- }
-
- /* Finished with most checks. One or two more checks happen later on too. */
-
- dev->is_mounted = 1;
-
- /* OK now calculate a few things for the device */
-
- /*
- * Calculate all the chunk size manipulation numbers:
- */
- x = dev->data_bytes_per_chunk;
- /* We always use dev->chunk_shift and dev->chunk_div */
- dev->chunk_shift = calc_shifts(x);
- x >>= dev->chunk_shift;
- dev->chunk_div = x;
- /* We only use chunk mask if chunk_div is 1 */
- dev->chunk_mask = (1 << dev->chunk_shift) - 1;
-
- /*
- * Calculate chunk_grp_bits.
- * We need to find the next power of 2 > than internal_end_block
- */
-
- x = dev->param.chunks_per_block * (dev->internal_end_block + 1);
-
- bits = calc_shifts_ceiling(x);
-
- /* Set up tnode width if wide tnodes are enabled. */
- if (!dev->param.wide_tnodes_disabled) {
- /* bits must be even so that we end up with 32-bit words */
- if (bits & 1)
- bits++;
- if (bits < 16)
- dev->tnode_width = 16;
- else
- dev->tnode_width = bits;
- } else {
- dev->tnode_width = 16;
- }
-
- dev->tnode_mask = (1 << dev->tnode_width) - 1;
-
- /* Level0 Tnodes are 16 bits or wider (if wide tnodes are enabled),
- * so if the bitwidth of the
- * chunk range we're using is greater than 16 we need
- * to figure out chunk shift and chunk_grp_size
- */
-
- if (bits <= dev->tnode_width)
- dev->chunk_grp_bits = 0;
- else
- dev->chunk_grp_bits = bits - dev->tnode_width;
-
- dev->tnode_size = (dev->tnode_width * YAFFS_NTNODES_LEVEL0) / 8;
- if (dev->tnode_size < sizeof(struct yaffs_tnode))
- dev->tnode_size = sizeof(struct yaffs_tnode);
-
- dev->chunk_grp_size = 1 << dev->chunk_grp_bits;
-
- if (dev->param.chunks_per_block < dev->chunk_grp_size) {
- /* We have a problem because the soft delete won't work if
- * the chunk group size > chunks per block.
- * This can be remedied by using larger "virtual blocks".
- */
- yaffs_trace(YAFFS_TRACE_ALWAYS, "chunk group too large");
-
- return YAFFS_FAIL;
- }
-
- /* OK, we've finished verifying the device, lets continue with initialisation */
-
- /* More device initialisation */
- dev->all_gcs = 0;
- dev->passive_gc_count = 0;
- dev->oldest_dirty_gc_count = 0;
- dev->bg_gcs = 0;
- dev->gc_block_finder = 0;
- dev->buffered_block = -1;
- dev->doing_buffered_block_rewrite = 0;
- dev->n_deleted_files = 0;
- dev->n_bg_deletions = 0;
- dev->n_unlinked_files = 0;
- dev->n_ecc_fixed = 0;
- dev->n_ecc_unfixed = 0;
- dev->n_tags_ecc_fixed = 0;
- dev->n_tags_ecc_unfixed = 0;
- dev->n_erase_failures = 0;
- dev->n_erased_blocks = 0;
- dev->gc_disable = 0;
- dev->has_pending_prioritised_gc = 1; /* Assume the worst for now, will get fixed on first GC */
- INIT_LIST_HEAD(&dev->dirty_dirs);
- dev->oldest_dirty_seq = 0;
- dev->oldest_dirty_block = 0;
-
- /* Initialise temporary buffers and caches. */
- if (!yaffs_init_tmp_buffers(dev))
- init_failed = 1;
-
- dev->cache = NULL;
- dev->gc_cleanup_list = NULL;
-
- if (!init_failed && dev->param.n_caches > 0) {
- int i;
- void *buf;
- int cache_bytes =
- dev->param.n_caches * sizeof(struct yaffs_cache);
-
- if (dev->param.n_caches > YAFFS_MAX_SHORT_OP_CACHES)
- dev->param.n_caches = YAFFS_MAX_SHORT_OP_CACHES;
-
- dev->cache = kmalloc(cache_bytes, GFP_NOFS);
-
- buf = (u8 *) dev->cache;
-
- if (dev->cache)
- memset(dev->cache, 0, cache_bytes);
-
- for (i = 0; i < dev->param.n_caches && buf; i++) {
- dev->cache[i].object = NULL;
- dev->cache[i].last_use = 0;
- dev->cache[i].dirty = 0;
- dev->cache[i].data = buf =
- kmalloc(dev->param.total_bytes_per_chunk, GFP_NOFS);
- }
- if (!buf)
- init_failed = 1;
-
- dev->cache_last_use = 0;
- }
-
- dev->cache_hits = 0;
-
- if (!init_failed) {
- dev->gc_cleanup_list =
- kmalloc(dev->param.chunks_per_block * sizeof(u32),
- GFP_NOFS);
- if (!dev->gc_cleanup_list)
- init_failed = 1;
- }
-
- if (dev->param.is_yaffs2)
- dev->param.use_header_file_size = 1;
-
- if (!init_failed && !yaffs_init_blocks(dev))
- init_failed = 1;
-
- yaffs_init_tnodes_and_objs(dev);
-
- if (!init_failed && !yaffs_create_initial_dir(dev))
- init_failed = 1;
-
- if (!init_failed) {
- /* Now scan the flash. */
- if (dev->param.is_yaffs2) {
- if (yaffs2_checkpt_restore(dev)) {
- yaffs_check_obj_details_loaded(dev->root_dir);
- yaffs_trace(YAFFS_TRACE_CHECKPOINT | YAFFS_TRACE_MOUNT,
- "yaffs: restored from checkpoint"
- );
- } else {
-
- /* Clean up the mess caused by an aborted checkpoint load
- * and scan backwards.
- */
- yaffs_deinit_blocks(dev);
-
- yaffs_deinit_tnodes_and_objs(dev);
-
- dev->n_erased_blocks = 0;
- dev->n_free_chunks = 0;
- dev->alloc_block = -1;
- dev->alloc_page = -1;
- dev->n_deleted_files = 0;
- dev->n_unlinked_files = 0;
- dev->n_bg_deletions = 0;
-
- if (!init_failed && !yaffs_init_blocks(dev))
- init_failed = 1;
-
- yaffs_init_tnodes_and_objs(dev);
-
- if (!init_failed
- && !yaffs_create_initial_dir(dev))
- init_failed = 1;
-
- if (!init_failed && !yaffs2_scan_backwards(dev))
- init_failed = 1;
- }
- } else if (!yaffs1_scan(dev)) {
- init_failed = 1;
- }
-
- yaffs_strip_deleted_objs(dev);
- yaffs_fix_hanging_objs(dev);
- if (dev->param.empty_lost_n_found)
- yaffs_empty_l_n_f(dev);
- }
-
- if (init_failed) {
- /* Clean up the mess */
- yaffs_trace(YAFFS_TRACE_TRACING,
- "yaffs: yaffs_guts_initialise() aborted.");
-
- yaffs_deinitialise(dev);
- return YAFFS_FAIL;
- }
-
- /* Zero out stats */
- dev->n_page_reads = 0;
- dev->n_page_writes = 0;
- dev->n_erasures = 0;
- dev->n_gc_copies = 0;
- dev->n_retired_writes = 0;
-
- dev->n_retired_blocks = 0;
-
- yaffs_verify_free_chunks(dev);
- yaffs_verify_blocks(dev);
-
- /* Clean up any aborted checkpoint data */
- if (!dev->is_checkpointed && dev->blocks_in_checkpt > 0)
- yaffs2_checkpt_invalidate(dev);
-
- yaffs_trace(YAFFS_TRACE_TRACING,
- "yaffs: yaffs_guts_initialise() done.");
- return YAFFS_OK;
-
-}
-
-void yaffs_deinitialise(struct yaffs_dev *dev)
-{
- if (dev->is_mounted) {
- int i;
-
- yaffs_deinit_blocks(dev);
- yaffs_deinit_tnodes_and_objs(dev);
- if (dev->param.n_caches > 0 && dev->cache) {
-
- for (i = 0; i < dev->param.n_caches; i++) {
- if (dev->cache[i].data)
- kfree(dev->cache[i].data);
- dev->cache[i].data = NULL;
- }
-
- kfree(dev->cache);
- dev->cache = NULL;
- }
-
- kfree(dev->gc_cleanup_list);
-
- for (i = 0; i < YAFFS_N_TEMP_BUFFERS; i++)
- kfree(dev->temp_buffer[i].buffer);
-
- dev->is_mounted = 0;
-
- if (dev->param.deinitialise_flash_fn)
- dev->param.deinitialise_flash_fn(dev);
- }
-}
-
-int yaffs_count_free_chunks(struct yaffs_dev *dev)
-{
- int n_free = 0;
- int b;
-
- struct yaffs_block_info *blk;
-
- blk = dev->block_info;
- for (b = dev->internal_start_block; b <= dev->internal_end_block; b++) {
- switch (blk->block_state) {
- case YAFFS_BLOCK_STATE_EMPTY:
- case YAFFS_BLOCK_STATE_ALLOCATING:
- case YAFFS_BLOCK_STATE_COLLECTING:
- case YAFFS_BLOCK_STATE_FULL:
- n_free +=
- (dev->param.chunks_per_block - blk->pages_in_use +
- blk->soft_del_pages);
- break;
- default:
- break;
- }
- blk++;
- }
-
- return n_free;
-}
-
-int yaffs_get_n_free_chunks(struct yaffs_dev *dev)
-{
- /* This is what we report to the outside world */
-
- int n_free;
- int n_dirty_caches;
- int blocks_for_checkpt;
- int i;
-
- n_free = dev->n_free_chunks;
- n_free += dev->n_deleted_files;
-
- /* Now count the number of dirty chunks in the cache and subtract those */
-
- for (n_dirty_caches = 0, i = 0; i < dev->param.n_caches; i++) {
- if (dev->cache[i].dirty)
- n_dirty_caches++;
- }
-
- n_free -= n_dirty_caches;
-
- n_free -=
- ((dev->param.n_reserved_blocks + 1) * dev->param.chunks_per_block);
-
- /* Now we figure out how much to reserve for the checkpoint and report that... */
- blocks_for_checkpt = yaffs_calc_checkpt_blocks_required(dev);
-
- n_free -= (blocks_for_checkpt * dev->param.chunks_per_block);
-
- if (n_free < 0)
- n_free = 0;
-
- return n_free;
-
-}
diff --git a/fs/yaffs2/yaffs_guts.h b/fs/yaffs2/yaffs_guts.h
deleted file mode 100644
index 307eba2..0000000
--- a/fs/yaffs2/yaffs_guts.h
+++ /dev/null
@@ -1,915 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_GUTS_H__
-#define __YAFFS_GUTS_H__
-
-#include "yportenv.h"
-
-#define YAFFS_OK 1
-#define YAFFS_FAIL 0
-
-/* Give us a Y=0x59,
- * Give us an A=0x41,
- * Give us an FF=0xFF
- * Give us an S=0x53
- * And what have we got...
- */
-#define YAFFS_MAGIC 0x5941FF53
-
-#define YAFFS_NTNODES_LEVEL0 16
-#define YAFFS_TNODES_LEVEL0_BITS 4
-#define YAFFS_TNODES_LEVEL0_MASK 0xf
-
-#define YAFFS_NTNODES_INTERNAL (YAFFS_NTNODES_LEVEL0 / 2)
-#define YAFFS_TNODES_INTERNAL_BITS (YAFFS_TNODES_LEVEL0_BITS - 1)
-#define YAFFS_TNODES_INTERNAL_MASK 0x7
-#define YAFFS_TNODES_MAX_LEVEL 6
-
-#ifndef CONFIG_YAFFS_NO_YAFFS1
-#define YAFFS_BYTES_PER_SPARE 16
-#define YAFFS_BYTES_PER_CHUNK 512
-#define YAFFS_CHUNK_SIZE_SHIFT 9
-#define YAFFS_CHUNKS_PER_BLOCK 32
-#define YAFFS_BYTES_PER_BLOCK (YAFFS_CHUNKS_PER_BLOCK*YAFFS_BYTES_PER_CHUNK)
-#endif
-
-#define YAFFS_MIN_YAFFS2_CHUNK_SIZE 1024
-#define YAFFS_MIN_YAFFS2_SPARE_SIZE 32
-
-#define YAFFS_MAX_CHUNK_ID 0x000FFFFF
-
-#define YAFFS_ALLOCATION_NOBJECTS 100
-#define YAFFS_ALLOCATION_NTNODES 100
-#define YAFFS_ALLOCATION_NLINKS 100
-
-#define YAFFS_NOBJECT_BUCKETS 256
-
-#define YAFFS_OBJECT_SPACE 0x40000
-#define YAFFS_MAX_OBJECT_ID (YAFFS_OBJECT_SPACE -1)
-
-#define YAFFS_CHECKPOINT_VERSION 4
-
-#ifdef CONFIG_YAFFS_UNICODE
-#define YAFFS_MAX_NAME_LENGTH 127
-#define YAFFS_MAX_ALIAS_LENGTH 79
-#else
-#define YAFFS_MAX_NAME_LENGTH 255
-#define YAFFS_MAX_ALIAS_LENGTH 159
-#endif
-
-#define YAFFS_SHORT_NAME_LENGTH 15
-
-/* Some special object ids for pseudo objects */
-#define YAFFS_OBJECTID_ROOT 1
-#define YAFFS_OBJECTID_LOSTNFOUND 2
-#define YAFFS_OBJECTID_UNLINKED 3
-#define YAFFS_OBJECTID_DELETED 4
-
-/* Pseudo object ids for checkpointing */
-#define YAFFS_OBJECTID_SB_HEADER 0x10
-#define YAFFS_OBJECTID_CHECKPOINT_DATA 0x20
-#define YAFFS_SEQUENCE_CHECKPOINT_DATA 0x21
-
-#define YAFFS_MAX_SHORT_OP_CACHES 20
-
-#define YAFFS_N_TEMP_BUFFERS 6
-
-/* We limit the number attempts at sucessfully saving a chunk of data.
- * Small-page devices have 32 pages per block; large-page devices have 64.
- * Default to something in the order of 5 to 10 blocks worth of chunks.
- */
-#define YAFFS_WR_ATTEMPTS (5*64)
-
-/* Sequence numbers are used in YAFFS2 to determine block allocation order.
- * The range is limited slightly to help distinguish bad numbers from good.
- * This also allows us to perhaps in the future use special numbers for
- * special purposes.
- * EFFFFF00 allows the allocation of 8 blocks per second (~1Mbytes) for 15 years,
- * and is a larger number than the lifetime of a 2GB device.
- */
-#define YAFFS_LOWEST_SEQUENCE_NUMBER 0x00001000
-#define YAFFS_HIGHEST_SEQUENCE_NUMBER 0xEFFFFF00
-
-/* Special sequence number for bad block that failed to be marked bad */
-#define YAFFS_SEQUENCE_BAD_BLOCK 0xFFFF0000
-
-/* ChunkCache is used for short read/write operations.*/
-struct yaffs_cache {
- struct yaffs_obj *object;
- int chunk_id;
- int last_use;
- int dirty;
- int n_bytes; /* Only valid if the cache is dirty */
- int locked; /* Can't push out or flush while locked. */
- u8 *data;
-};
-
-/* Tags structures in RAM
- * NB This uses bitfield. Bitfields should not straddle a u32 boundary otherwise
- * the structure size will get blown out.
- */
-
-#ifndef CONFIG_YAFFS_NO_YAFFS1
-struct yaffs_tags {
- unsigned chunk_id:20;
- unsigned serial_number:2;
- unsigned n_bytes_lsb:10;
- unsigned obj_id:18;
- unsigned ecc:12;
- unsigned n_bytes_msb:2;
-};
-
-union yaffs_tags_union {
- struct yaffs_tags as_tags;
- u8 as_bytes[8];
-};
-
-#endif
-
-/* Stuff used for extended tags in YAFFS2 */
-
-enum yaffs_ecc_result {
- YAFFS_ECC_RESULT_UNKNOWN,
- YAFFS_ECC_RESULT_NO_ERROR,
- YAFFS_ECC_RESULT_FIXED,
- YAFFS_ECC_RESULT_UNFIXED
-};
-
-enum yaffs_obj_type {
- YAFFS_OBJECT_TYPE_UNKNOWN,
- YAFFS_OBJECT_TYPE_FILE,
- YAFFS_OBJECT_TYPE_SYMLINK,
- YAFFS_OBJECT_TYPE_DIRECTORY,
- YAFFS_OBJECT_TYPE_HARDLINK,
- YAFFS_OBJECT_TYPE_SPECIAL
-};
-
-#define YAFFS_OBJECT_TYPE_MAX YAFFS_OBJECT_TYPE_SPECIAL
-
-struct yaffs_ext_tags {
-
- unsigned validity0;
- unsigned chunk_used; /* Status of the chunk: used or unused */
- unsigned obj_id; /* If 0 then this is not part of an object (unused) */
- unsigned chunk_id; /* If 0 then this is a header, else a data chunk */
- unsigned n_bytes; /* Only valid for data chunks */
-
- /* The following stuff only has meaning when we read */
- enum yaffs_ecc_result ecc_result;
- unsigned block_bad;
-
- /* YAFFS 1 stuff */
- unsigned is_deleted; /* The chunk is marked deleted */
- unsigned serial_number; /* Yaffs1 2-bit serial number */
-
- /* YAFFS2 stuff */
- unsigned seq_number; /* The sequence number of this block */
-
- /* Extra info if this is an object header (YAFFS2 only) */
-
- unsigned extra_available; /* There is extra info available if this is not zero */
- unsigned extra_parent_id; /* The parent object */
- unsigned extra_is_shrink; /* Is it a shrink header? */
- unsigned extra_shadows; /* Does this shadow another object? */
-
- enum yaffs_obj_type extra_obj_type; /* What object type? */
-
- unsigned extra_length; /* Length if it is a file */
- unsigned extra_equiv_id; /* Equivalent object Id if it is a hard link */
-
- unsigned validity1;
-
-};
-
-/* Spare structure for YAFFS1 */
-struct yaffs_spare {
- u8 tb0;
- u8 tb1;
- u8 tb2;
- u8 tb3;
- u8 page_status; /* set to 0 to delete the chunk */
- u8 block_status;
- u8 tb4;
- u8 tb5;
- u8 ecc1[3];
- u8 tb6;
- u8 tb7;
- u8 ecc2[3];
-};
-
-/*Special structure for passing through to mtd */
-struct yaffs_nand_spare {
- struct yaffs_spare spare;
- int eccres1;
- int eccres2;
-};
-
-/* Block data in RAM */
-
-enum yaffs_block_state {
- YAFFS_BLOCK_STATE_UNKNOWN = 0,
-
- YAFFS_BLOCK_STATE_SCANNING,
- /* Being scanned */
-
- YAFFS_BLOCK_STATE_NEEDS_SCANNING,
- /* The block might have something on it (ie it is allocating or full, perhaps empty)
- * but it needs to be scanned to determine its true state.
- * This state is only valid during scanning.
- * NB We tolerate empty because the pre-scanner might be incapable of deciding
- * However, if this state is returned on a YAFFS2 device, then we expect a sequence number
- */
-
- YAFFS_BLOCK_STATE_EMPTY,
- /* This block is empty */
-
- YAFFS_BLOCK_STATE_ALLOCATING,
- /* This block is partially allocated.
- * At least one page holds valid data.
- * This is the one currently being used for page
- * allocation. Should never be more than one of these.
- * If a block is only partially allocated at mount it is treated as full.
- */
-
- YAFFS_BLOCK_STATE_FULL,
- /* All the pages in this block have been allocated.
- * If a block was only partially allocated when mounted we treat
- * it as fully allocated.
- */
-
- YAFFS_BLOCK_STATE_DIRTY,
- /* The block was full and now all chunks have been deleted.
- * Erase me, reuse me.
- */
-
- YAFFS_BLOCK_STATE_CHECKPOINT,
- /* This block is assigned to holding checkpoint data. */
-
- YAFFS_BLOCK_STATE_COLLECTING,
- /* This block is being garbage collected */
-
- YAFFS_BLOCK_STATE_DEAD
- /* This block has failed and is not in use */
-};
-
-#define YAFFS_NUMBER_OF_BLOCK_STATES (YAFFS_BLOCK_STATE_DEAD + 1)
-
-struct yaffs_block_info {
-
- int soft_del_pages:10; /* number of soft deleted pages */
- int pages_in_use:10; /* number of pages in use */
- unsigned block_state:4; /* One of the above block states. NB use unsigned because enum is sometimes an int */
- u32 needs_retiring:1; /* Data has failed on this block, need to get valid data off */
- /* and retire the block. */
- u32 skip_erased_check:1; /* If this is set we can skip the erased check on this block */
- u32 gc_prioritise:1; /* An ECC check or blank check has failed on this block.
- It should be prioritised for GC */
- u32 chunk_error_strikes:3; /* How many times we've had ecc etc failures on this block and tried to reuse it */
-
-#ifdef CONFIG_YAFFS_YAFFS2
- u32 has_shrink_hdr:1; /* This block has at least one shrink object header */
- u32 seq_number; /* block sequence number for yaffs2 */
-#endif
-
-};
-
-/* -------------------------- Object structure -------------------------------*/
-/* This is the object structure as stored on NAND */
-
-struct yaffs_obj_hdr {
- enum yaffs_obj_type type;
-
- /* Apply to everything */
- int parent_obj_id;
- u16 sum_no_longer_used; /* checksum of name. No longer used */
- YCHAR name[YAFFS_MAX_NAME_LENGTH + 1];
-
- /* The following apply to directories, files, symlinks - not hard links */
- u32 yst_mode; /* protection */
-
- u32 yst_uid;
- u32 yst_gid;
- u32 yst_atime;
- u32 yst_mtime;
- u32 yst_ctime;
-
- /* File size applies to files only */
- int file_size;
-
- /* Equivalent object id applies to hard links only. */
- int equiv_id;
-
- /* Alias is for symlinks only. */
- YCHAR alias[YAFFS_MAX_ALIAS_LENGTH + 1];
-
- u32 yst_rdev; /* device stuff for block and char devices (major/min) */
-
- u32 win_ctime[2];
- u32 win_atime[2];
- u32 win_mtime[2];
-
- u32 inband_shadowed_obj_id;
- u32 inband_is_shrink;
-
- u32 reserved[2];
- int shadows_obj; /* This object header shadows the specified object if > 0 */
-
- /* is_shrink applies to object headers written when we shrink the file (ie resize) */
- u32 is_shrink;
-
-};
-
-/*--------------------------- Tnode -------------------------- */
-
-struct yaffs_tnode {
- struct yaffs_tnode *internal[YAFFS_NTNODES_INTERNAL];
-};
-
-/*------------------------ Object -----------------------------*/
-/* An object can be one of:
- * - a directory (no data, has children links
- * - a regular file (data.... not prunes :->).
- * - a symlink [symbolic link] (the alias).
- * - a hard link
- */
-
-struct yaffs_file_var {
- u32 file_size;
- u32 scanned_size;
- u32 shrink_size;
- int top_level;
- struct yaffs_tnode *top;
-};
-
-struct yaffs_dir_var {
- struct list_head children; /* list of child links */
- struct list_head dirty; /* Entry for list of dirty directories */
-};
-
-struct yaffs_symlink_var {
- YCHAR *alias;
-};
-
-struct yaffs_hardlink_var {
- struct yaffs_obj *equiv_obj;
- u32 equiv_id;
-};
-
-union yaffs_obj_var {
- struct yaffs_file_var file_variant;
- struct yaffs_dir_var dir_variant;
- struct yaffs_symlink_var symlink_variant;
- struct yaffs_hardlink_var hardlink_variant;
-};
-
-struct yaffs_obj {
- u8 deleted:1; /* This should only apply to unlinked files. */
- u8 soft_del:1; /* it has also been soft deleted */
- u8 unlinked:1; /* An unlinked file. The file should be in the unlinked directory. */
- u8 fake:1; /* A fake object has no presence on NAND. */
- u8 rename_allowed:1; /* Some objects are not allowed to be renamed. */
- u8 unlink_allowed:1;
- u8 dirty:1; /* the object needs to be written to flash */
- u8 valid:1; /* When the file system is being loaded up, this
- * object might be created before the data
- * is available (ie. file data records appear before the header).
- */
- u8 lazy_loaded:1; /* This object has been lazy loaded and is missing some detail */
-
- u8 defered_free:1; /* For Linux kernel. Object is removed from NAND, but is
- * still in the inode cache. Free of object is defered.
- * until the inode is released.
- */
- u8 being_created:1; /* This object is still being created so skip some checks. */
- u8 is_shadowed:1; /* This object is shadowed on the way to being renamed. */
-
- u8 xattr_known:1; /* We know if this has object has xattribs or not. */
- u8 has_xattr:1; /* This object has xattribs. Valid if xattr_known. */
-
- u8 serial; /* serial number of chunk in NAND. Cached here */
- u16 sum; /* sum of the name to speed searching */
-
- struct yaffs_dev *my_dev; /* The device I'm on */
-
- struct list_head hash_link; /* list of objects in this hash bucket */
-
- struct list_head hard_links; /* all the equivalent hard linked objects */
-
- /* directory structure stuff */
- /* also used for linking up the free list */
- struct yaffs_obj *parent;
- struct list_head siblings;
-
- /* Where's my object header in NAND? */
- int hdr_chunk;
-
- int n_data_chunks; /* Number of data chunks attached to the file. */
-
- u32 obj_id; /* the object id value */
-
- u32 yst_mode;
-
-#ifndef CONFIG_YAFFS_NO_SHORT_NAMES
- YCHAR short_name[YAFFS_SHORT_NAME_LENGTH + 1];
-#endif
-
-#ifdef CONFIG_YAFFS_WINCE
- u32 win_ctime[2];
- u32 win_mtime[2];
- u32 win_atime[2];
-#else
- u32 yst_uid;
- u32 yst_gid;
- u32 yst_atime;
- u32 yst_mtime;
- u32 yst_ctime;
-#endif
-
- u32 yst_rdev;
-
- void *my_inode;
-
- enum yaffs_obj_type variant_type;
-
- union yaffs_obj_var variant;
-
-};
-
-struct yaffs_obj_bucket {
- struct list_head list;
- int count;
-};
-
-/* yaffs_checkpt_obj holds the definition of an object as dumped
- * by checkpointing.
- */
-
-struct yaffs_checkpt_obj {
- int struct_type;
- u32 obj_id;
- u32 parent_id;
- int hdr_chunk;
- enum yaffs_obj_type variant_type:3;
- u8 deleted:1;
- u8 soft_del:1;
- u8 unlinked:1;
- u8 fake:1;
- u8 rename_allowed:1;
- u8 unlink_allowed:1;
- u8 serial;
- int n_data_chunks;
- u32 size_or_equiv_obj;
-};
-
-/*--------------------- Temporary buffers ----------------
- *
- * These are chunk-sized working buffers. Each device has a few
- */
-
-struct yaffs_buffer {
- u8 *buffer;
- int line; /* track from whence this buffer was allocated */
- int max_line;
-};
-
-/*----------------- Device ---------------------------------*/
-
-struct yaffs_param {
- const YCHAR *name;
-
- /*
- * Entry parameters set up way early. Yaffs sets up the rest.
- * The structure should be zeroed out before use so that unused
- * and defualt values are zero.
- */
-
- int inband_tags; /* Use unband tags */
- u32 total_bytes_per_chunk; /* Should be >= 512, does not need to be a power of 2 */
- int chunks_per_block; /* does not need to be a power of 2 */
- int spare_bytes_per_chunk; /* spare area size */
- int start_block; /* Start block we're allowed to use */
- int end_block; /* End block we're allowed to use */
- int n_reserved_blocks; /* We want this tuneable so that we can reduce */
- /* reserved blocks on NOR and RAM. */
-
- int n_caches; /* If <= 0, then short op caching is disabled, else
- * the number of short op caches (don't use too many).
- * 10 to 20 is a good bet.
- */
- int use_nand_ecc; /* Flag to decide whether or not to use NANDECC on data (yaffs1) */
- int no_tags_ecc; /* Flag to decide whether or not to do ECC on packed tags (yaffs2) */
-
- int is_yaffs2; /* Use yaffs2 mode on this device */
-
- int empty_lost_n_found; /* Auto-empty lost+found directory on mount */
-
- int refresh_period; /* How often we should check to do a block refresh */
-
- /* Checkpoint control. Can be set before or after initialisation */
- u8 skip_checkpt_rd;
- u8 skip_checkpt_wr;
-
- int enable_xattr; /* Enable xattribs */
-
- /* NAND access functions (Must be set before calling YAFFS) */
-
- int (*write_chunk_fn) (struct yaffs_dev * dev,
- int nand_chunk, const u8 * data,
- const struct yaffs_spare * spare);
- int (*read_chunk_fn) (struct yaffs_dev * dev,
- int nand_chunk, u8 * data,
- struct yaffs_spare * spare);
- int (*erase_fn) (struct yaffs_dev * dev, int flash_block);
- int (*initialise_flash_fn) (struct yaffs_dev * dev);
- int (*deinitialise_flash_fn) (struct yaffs_dev * dev);
-
-#ifdef CONFIG_YAFFS_YAFFS2
- int (*write_chunk_tags_fn) (struct yaffs_dev * dev,
- int nand_chunk, const u8 * data,
- const struct yaffs_ext_tags * tags);
- int (*read_chunk_tags_fn) (struct yaffs_dev * dev,
- int nand_chunk, u8 * data,
- struct yaffs_ext_tags * tags);
- int (*bad_block_fn) (struct yaffs_dev * dev, int block_no);
- int (*query_block_fn) (struct yaffs_dev * dev, int block_no,
- enum yaffs_block_state * state,
- u32 * seq_number);
-#endif
-
- /* The remove_obj_fn function must be supplied by OS flavours that
- * need it.
- * yaffs direct uses it to implement the faster readdir.
- * Linux uses it to protect the directory during unlocking.
- */
- void (*remove_obj_fn) (struct yaffs_obj * obj);
-
- /* Callback to mark the superblock dirty */
- void (*sb_dirty_fn) (struct yaffs_dev * dev);
-
- /* Callback to control garbage collection. */
- unsigned (*gc_control) (struct yaffs_dev * dev);
-
- /* Debug control flags. Don't use unless you know what you're doing */
- int use_header_file_size; /* Flag to determine if we should use file sizes from the header */
- int disable_lazy_load; /* Disable lazy loading on this device */
- int wide_tnodes_disabled; /* Set to disable wide tnodes */
- int disable_soft_del; /* yaffs 1 only: Set to disable the use of softdeletion. */
-
- int defered_dir_update; /* Set to defer directory updates */
-
-#ifdef CONFIG_YAFFS_AUTO_UNICODE
- int auto_unicode;
-#endif
- int always_check_erased; /* Force chunk erased check always on */
-};
-
-struct yaffs_dev {
- struct yaffs_param param;
-
- /* Context storage. Holds extra OS specific data for this device */
-
- void *os_context;
- void *driver_context;
-
- struct list_head dev_list;
-
- /* Runtime parameters. Set up by YAFFS. */
- int data_bytes_per_chunk;
-
- /* Non-wide tnode stuff */
- u16 chunk_grp_bits; /* Number of bits that need to be resolved if
- * the tnodes are not wide enough.
- */
- u16 chunk_grp_size; /* == 2^^chunk_grp_bits */
-
- /* Stuff to support wide tnodes */
- u32 tnode_width;
- u32 tnode_mask;
- u32 tnode_size;
-
- /* Stuff for figuring out file offset to chunk conversions */
- u32 chunk_shift; /* Shift value */
- u32 chunk_div; /* Divisor after shifting: 1 for power-of-2 sizes */
- u32 chunk_mask; /* Mask to use for power-of-2 case */
-
- int is_mounted;
- int read_only;
- int is_checkpointed;
-
- /* Stuff to support block offsetting to support start block zero */
- int internal_start_block;
- int internal_end_block;
- int block_offset;
- int chunk_offset;
-
- /* Runtime checkpointing stuff */
- int checkpt_page_seq; /* running sequence number of checkpoint pages */
- int checkpt_byte_count;
- int checkpt_byte_offs;
- u8 *checkpt_buffer;
- int checkpt_open_write;
- int blocks_in_checkpt;
- int checkpt_cur_chunk;
- int checkpt_cur_block;
- int checkpt_next_block;
- int *checkpt_block_list;
- int checkpt_max_blocks;
- u32 checkpt_sum;
- u32 checkpt_xor;
-
- int checkpoint_blocks_required; /* Number of blocks needed to store current checkpoint set */
-
- /* Block Info */
- struct yaffs_block_info *block_info;
- u8 *chunk_bits; /* bitmap of chunks in use */
- unsigned block_info_alt:1; /* was allocated using alternative strategy */
- unsigned chunk_bits_alt:1; /* was allocated using alternative strategy */
- int chunk_bit_stride; /* Number of bytes of chunk_bits per block.
- * Must be consistent with chunks_per_block.
- */
-
- int n_erased_blocks;
- int alloc_block; /* Current block being allocated off */
- u32 alloc_page;
- int alloc_block_finder; /* Used to search for next allocation block */
-
- /* Object and Tnode memory management */
- void *allocator;
- int n_obj;
- int n_tnodes;
-
- int n_hardlinks;
-
- struct yaffs_obj_bucket obj_bucket[YAFFS_NOBJECT_BUCKETS];
- u32 bucket_finder;
-
- int n_free_chunks;
-
- /* Garbage collection control */
- u32 *gc_cleanup_list; /* objects to delete at the end of a GC. */
- u32 n_clean_ups;
-
- unsigned has_pending_prioritised_gc; /* We think this device might have pending prioritised gcs */
- unsigned gc_disable;
- unsigned gc_block_finder;
- unsigned gc_dirtiest;
- unsigned gc_pages_in_use;
- unsigned gc_not_done;
- unsigned gc_block;
- unsigned gc_chunk;
- unsigned gc_skip;
-
- /* Special directories */
- struct yaffs_obj *root_dir;
- struct yaffs_obj *lost_n_found;
-
- /* Buffer areas for storing data to recover from write failures TODO
- * u8 buffered_data[YAFFS_CHUNKS_PER_BLOCK][YAFFS_BYTES_PER_CHUNK];
- * struct yaffs_spare buffered_spare[YAFFS_CHUNKS_PER_BLOCK];
- */
-
- int buffered_block; /* Which block is buffered here? */
- int doing_buffered_block_rewrite;
-
- struct yaffs_cache *cache;
- int cache_last_use;
-
- /* Stuff for background deletion and unlinked files. */
- struct yaffs_obj *unlinked_dir; /* Directory where unlinked and deleted files live. */
- struct yaffs_obj *del_dir; /* Directory where deleted objects are sent to disappear. */
- struct yaffs_obj *unlinked_deletion; /* Current file being background deleted. */
- int n_deleted_files; /* Count of files awaiting deletion; */
- int n_unlinked_files; /* Count of unlinked files. */
- int n_bg_deletions; /* Count of background deletions. */
-
- /* Temporary buffer management */
- struct yaffs_buffer temp_buffer[YAFFS_N_TEMP_BUFFERS];
- int max_temp;
- int temp_in_use;
- int unmanaged_buffer_allocs;
- int unmanaged_buffer_deallocs;
-
- /* yaffs2 runtime stuff */
- unsigned seq_number; /* Sequence number of currently allocating block */
- unsigned oldest_dirty_seq;
- unsigned oldest_dirty_block;
-
- /* Block refreshing */
- int refresh_skip; /* A skip down counter. Refresh happens when this gets to zero. */
-
- /* Dirty directory handling */
- struct list_head dirty_dirs; /* List of dirty directories */
-
- /* Statistcs */
- u32 n_page_writes;
- u32 n_page_reads;
- u32 n_erasures;
- u32 n_erase_failures;
- u32 n_gc_copies;
- u32 all_gcs;
- u32 passive_gc_count;
- u32 oldest_dirty_gc_count;
- u32 n_gc_blocks;
- u32 bg_gcs;
- u32 n_retired_writes;
- u32 n_retired_blocks;
- u32 n_ecc_fixed;
- u32 n_ecc_unfixed;
- u32 n_tags_ecc_fixed;
- u32 n_tags_ecc_unfixed;
- u32 n_deletions;
- u32 n_unmarked_deletions;
- u32 refresh_count;
- u32 cache_hits;
-
-};
-
-/* The CheckpointDevice structure holds the device information that changes at runtime and
- * must be preserved over unmount/mount cycles.
- */
-struct yaffs_checkpt_dev {
- int struct_type;
- int n_erased_blocks;
- int alloc_block; /* Current block being allocated off */
- u32 alloc_page;
- int n_free_chunks;
-
- int n_deleted_files; /* Count of files awaiting deletion; */
- int n_unlinked_files; /* Count of unlinked files. */
- int n_bg_deletions; /* Count of background deletions. */
-
- /* yaffs2 runtime stuff */
- unsigned seq_number; /* Sequence number of currently allocating block */
-
-};
-
-struct yaffs_checkpt_validity {
- int struct_type;
- u32 magic;
- u32 version;
- u32 head;
-};
-
-struct yaffs_shadow_fixer {
- int obj_id;
- int shadowed_id;
- struct yaffs_shadow_fixer *next;
-};
-
-/* Structure for doing xattr modifications */
-struct yaffs_xattr_mod {
- int set; /* If 0 then this is a deletion */
- const YCHAR *name;
- const void *data;
- int size;
- int flags;
- int result;
-};
-
-/*----------------------- YAFFS Functions -----------------------*/
-
-int yaffs_guts_initialise(struct yaffs_dev *dev);
-void yaffs_deinitialise(struct yaffs_dev *dev);
-
-int yaffs_get_n_free_chunks(struct yaffs_dev *dev);
-
-int yaffs_rename_obj(struct yaffs_obj *old_dir, const YCHAR * old_name,
- struct yaffs_obj *new_dir, const YCHAR * new_name);
-
-int yaffs_unlinker(struct yaffs_obj *dir, const YCHAR * name);
-int yaffs_del_obj(struct yaffs_obj *obj);
-
-int yaffs_get_obj_name(struct yaffs_obj *obj, YCHAR * name, int buffer_size);
-int yaffs_get_obj_length(struct yaffs_obj *obj);
-int yaffs_get_obj_inode(struct yaffs_obj *obj);
-unsigned yaffs_get_obj_type(struct yaffs_obj *obj);
-int yaffs_get_obj_link_count(struct yaffs_obj *obj);
-
-/* File operations */
-int yaffs_file_rd(struct yaffs_obj *obj, u8 * buffer, loff_t offset,
- int n_bytes);
-int yaffs_wr_file(struct yaffs_obj *obj, const u8 * buffer, loff_t offset,
- int n_bytes, int write_trhrough);
-int yaffs_resize_file(struct yaffs_obj *obj, loff_t new_size);
-
-struct yaffs_obj *yaffs_create_file(struct yaffs_obj *parent,
- const YCHAR * name, u32 mode, u32 uid,
- u32 gid);
-
-int yaffs_flush_file(struct yaffs_obj *obj, int update_time, int data_sync);
-
-/* Flushing and checkpointing */
-void yaffs_flush_whole_cache(struct yaffs_dev *dev);
-
-int yaffs_checkpoint_save(struct yaffs_dev *dev);
-int yaffs_checkpoint_restore(struct yaffs_dev *dev);
-
-/* Directory operations */
-struct yaffs_obj *yaffs_create_dir(struct yaffs_obj *parent, const YCHAR * name,
- u32 mode, u32 uid, u32 gid);
-struct yaffs_obj *yaffs_find_by_name(struct yaffs_obj *the_dir,
- const YCHAR * name);
-struct yaffs_obj *yaffs_find_by_number(struct yaffs_dev *dev, u32 number);
-
-/* Link operations */
-struct yaffs_obj *yaffs_link_obj(struct yaffs_obj *parent, const YCHAR * name,
- struct yaffs_obj *equiv_obj);
-
-struct yaffs_obj *yaffs_get_equivalent_obj(struct yaffs_obj *obj);
-
-/* Symlink operations */
-struct yaffs_obj *yaffs_create_symlink(struct yaffs_obj *parent,
- const YCHAR * name, u32 mode, u32 uid,
- u32 gid, const YCHAR * alias);
-YCHAR *yaffs_get_symlink_alias(struct yaffs_obj *obj);
-
-/* Special inodes (fifos, sockets and devices) */
-struct yaffs_obj *yaffs_create_special(struct yaffs_obj *parent,
- const YCHAR * name, u32 mode, u32 uid,
- u32 gid, u32 rdev);
-
-int yaffs_set_xattrib(struct yaffs_obj *obj, const YCHAR * name,
- const void *value, int size, int flags);
-int yaffs_get_xattrib(struct yaffs_obj *obj, const YCHAR * name, void *value,
- int size);
-int yaffs_list_xattrib(struct yaffs_obj *obj, char *buffer, int size);
-int yaffs_remove_xattrib(struct yaffs_obj *obj, const YCHAR * name);
-
-/* Special directories */
-struct yaffs_obj *yaffs_root(struct yaffs_dev *dev);
-struct yaffs_obj *yaffs_lost_n_found(struct yaffs_dev *dev);
-
-void yaffs_handle_defered_free(struct yaffs_obj *obj);
-
-void yaffs_update_dirty_dirs(struct yaffs_dev *dev);
-
-int yaffs_bg_gc(struct yaffs_dev *dev, unsigned urgency);
-
-/* Debug dump */
-int yaffs_dump_obj(struct yaffs_obj *obj);
-
-void yaffs_guts_test(struct yaffs_dev *dev);
-
-/* A few useful functions to be used within the core files*/
-void yaffs_chunk_del(struct yaffs_dev *dev, int chunk_id, int mark_flash,
- int lyn);
-int yaffs_check_ff(u8 * buffer, int n_bytes);
-void yaffs_handle_chunk_error(struct yaffs_dev *dev,
- struct yaffs_block_info *bi);
-
-u8 *yaffs_get_temp_buffer(struct yaffs_dev *dev, int line_no);
-void yaffs_release_temp_buffer(struct yaffs_dev *dev, u8 * buffer, int line_no);
-
-struct yaffs_obj *yaffs_find_or_create_by_number(struct yaffs_dev *dev,
- int number,
- enum yaffs_obj_type type);
-int yaffs_put_chunk_in_file(struct yaffs_obj *in, int inode_chunk,
- int nand_chunk, int in_scan);
-void yaffs_set_obj_name(struct yaffs_obj *obj, const YCHAR * name);
-void yaffs_set_obj_name_from_oh(struct yaffs_obj *obj,
- const struct yaffs_obj_hdr *oh);
-void yaffs_add_obj_to_dir(struct yaffs_obj *directory, struct yaffs_obj *obj);
-YCHAR *yaffs_clone_str(const YCHAR * str);
-void yaffs_link_fixup(struct yaffs_dev *dev, struct yaffs_obj *hard_list);
-void yaffs_block_became_dirty(struct yaffs_dev *dev, int block_no);
-int yaffs_update_oh(struct yaffs_obj *in, const YCHAR * name,
- int force, int is_shrink, int shadows,
- struct yaffs_xattr_mod *xop);
-void yaffs_handle_shadowed_obj(struct yaffs_dev *dev, int obj_id,
- int backward_scanning);
-int yaffs_check_alloc_available(struct yaffs_dev *dev, int n_chunks);
-struct yaffs_tnode *yaffs_get_tnode(struct yaffs_dev *dev);
-struct yaffs_tnode *yaffs_add_find_tnode_0(struct yaffs_dev *dev,
- struct yaffs_file_var *file_struct,
- u32 chunk_id,
- struct yaffs_tnode *passed_tn);
-
-int yaffs_do_file_wr(struct yaffs_obj *in, const u8 * buffer, loff_t offset,
- int n_bytes, int write_trhrough);
-void yaffs_resize_file_down(struct yaffs_obj *obj, loff_t new_size);
-void yaffs_skip_rest_of_block(struct yaffs_dev *dev);
-
-int yaffs_count_free_chunks(struct yaffs_dev *dev);
-
-struct yaffs_tnode *yaffs_find_tnode_0(struct yaffs_dev *dev,
- struct yaffs_file_var *file_struct,
- u32 chunk_id);
-
-u32 yaffs_get_group_base(struct yaffs_dev *dev, struct yaffs_tnode *tn,
- unsigned pos);
-
-int yaffs_is_non_empty_dir(struct yaffs_obj *obj);
-#endif
diff --git a/fs/yaffs2/yaffs_linux.h b/fs/yaffs2/yaffs_linux.h
deleted file mode 100644
index 3b508cb..0000000
--- a/fs/yaffs2/yaffs_linux.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_LINUX_H__
-#define __YAFFS_LINUX_H__
-
-#include "yportenv.h"
-
-struct yaffs_linux_context {
- struct list_head context_list; /* List of these we have mounted */
- struct yaffs_dev *dev;
- struct super_block *super;
- struct task_struct *bg_thread; /* Background thread for this device */
- int bg_running;
- struct mutex gross_lock; /* Gross locking mutex*/
- u8 *spare_buffer; /* For mtdif2 use. Don't know the size of the buffer
- * at compile time so we have to allocate it.
- */
- struct list_head search_contexts;
- void (*put_super_fn) (struct super_block * sb);
-
- struct task_struct *readdir_process;
- unsigned mount_id;
-};
-
-#define yaffs_dev_to_lc(dev) ((struct yaffs_linux_context *)((dev)->os_context))
-#define yaffs_dev_to_mtd(dev) ((struct mtd_info *)((dev)->driver_context))
-
-#endif
diff --git a/fs/yaffs2/yaffs_mtdif.c b/fs/yaffs2/yaffs_mtdif.c
deleted file mode 100644
index 7cf53b3..0000000
--- a/fs/yaffs2/yaffs_mtdif.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yportenv.h"
-
-#include "yaffs_mtdif.h"
-
-#include "linux/mtd/mtd.h"
-#include "linux/types.h"
-#include "linux/time.h"
-#include "linux/mtd/nand.h"
-
-#include "yaffs_linux.h"
-
-int nandmtd_erase_block(struct yaffs_dev *dev, int block_no)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- u32 addr =
- ((loff_t) block_no) * dev->param.total_bytes_per_chunk
- * dev->param.chunks_per_block;
- struct erase_info ei;
-
- int retval = 0;
-
- ei.mtd = mtd;
- ei.addr = addr;
- ei.len = dev->param.total_bytes_per_chunk * dev->param.chunks_per_block;
- ei.time = 1000;
- ei.retries = 2;
- ei.callback = NULL;
- ei.priv = (u_long) dev;
-
- retval = mtd->erase(mtd, &ei);
-
- if (retval == 0)
- return YAFFS_OK;
- else
- return YAFFS_FAIL;
-}
-
-int nandmtd_initialise(struct yaffs_dev *dev)
-{
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_mtdif.h b/fs/yaffs2/yaffs_mtdif.h
deleted file mode 100644
index 6665074..0000000
--- a/fs/yaffs2/yaffs_mtdif.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_MTDIF_H__
-#define __YAFFS_MTDIF_H__
-
-#include "yaffs_guts.h"
-
-int nandmtd_erase_block(struct yaffs_dev *dev, int block_no);
-int nandmtd_initialise(struct yaffs_dev *dev);
-#endif
diff --git a/fs/yaffs2/yaffs_mtdif1.c b/fs/yaffs2/yaffs_mtdif1.c
deleted file mode 100644
index 5108369..0000000
--- a/fs/yaffs2/yaffs_mtdif1.c
+++ /dev/null
@@ -1,330 +0,0 @@
-/*
- * YAFFS: Yet another FFS. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * This module provides the interface between yaffs_nand.c and the
- * MTD API. This version is used when the MTD interface supports the
- * 'mtd_oob_ops' style calls to read_oob and write_oob, circa 2.6.17,
- * and we have small-page NAND device.
- *
- * These functions are invoked via function pointers in yaffs_nand.c.
- * This replaces functionality provided by functions in yaffs_mtdif.c
- * and the yaffs_tags compatability functions in yaffs_tagscompat.c that are
- * called in yaffs_mtdif.c when the function pointers are NULL.
- * We assume the MTD layer is performing ECC (use_nand_ecc is true).
- */
-
-#include "yportenv.h"
-#include "yaffs_trace.h"
-#include "yaffs_guts.h"
-#include "yaffs_packedtags1.h"
-#include "yaffs_tagscompat.h" /* for yaffs_calc_tags_ecc */
-#include "yaffs_linux.h"
-
-#include "linux/kernel.h"
-#include "linux/version.h"
-#include "linux/types.h"
-#include "linux/mtd/mtd.h"
-
-#ifndef CONFIG_YAFFS_9BYTE_TAGS
-# define YTAG1_SIZE 8
-#else
-# define YTAG1_SIZE 9
-#endif
-
-/* Write a chunk (page) of data to NAND.
- *
- * Caller always provides ExtendedTags data which are converted to a more
- * compact (packed) form for storage in NAND. A mini-ECC runs over the
- * contents of the tags meta-data; used to valid the tags when read.
- *
- * - Pack ExtendedTags to packed_tags1 form
- * - Compute mini-ECC for packed_tags1
- * - Write data and packed tags to NAND.
- *
- * Note: Due to the use of the packed_tags1 meta-data which does not include
- * a full sequence number (as found in the larger packed_tags2 form) it is
- * necessary for Yaffs to re-write a chunk/page (just once) to mark it as
- * discarded and dirty. This is not ideal: newer NAND parts are supposed
- * to be written just once. When Yaffs performs this operation, this
- * function is called with a NULL data pointer -- calling MTD write_oob
- * without data is valid usage (2.6.17).
- *
- * Any underlying MTD error results in YAFFS_FAIL.
- * Returns YAFFS_OK or YAFFS_FAIL.
- */
-int nandmtd1_write_chunk_tags(struct yaffs_dev *dev,
- int nand_chunk, const u8 * data,
- const struct yaffs_ext_tags *etags)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- int chunk_bytes = dev->data_bytes_per_chunk;
- loff_t addr = ((loff_t) nand_chunk) * chunk_bytes;
- struct mtd_oob_ops ops;
- struct yaffs_packed_tags1 pt1;
- int retval;
-
- /* we assume that packed_tags1 and struct yaffs_tags are compatible */
- compile_time_assertion(sizeof(struct yaffs_packed_tags1) == 12);
- compile_time_assertion(sizeof(struct yaffs_tags) == 8);
-
- yaffs_pack_tags1(&pt1, etags);
- yaffs_calc_tags_ecc((struct yaffs_tags *)&pt1);
-
- /* When deleting a chunk, the upper layer provides only skeletal
- * etags, one with is_deleted set. However, we need to update the
- * tags, not erase them completely. So we use the NAND write property
- * that only zeroed-bits stick and set tag bytes to all-ones and
- * zero just the (not) deleted bit.
- */
-#ifndef CONFIG_YAFFS_9BYTE_TAGS
- if (etags->is_deleted) {
- memset(&pt1, 0xff, 8);
- /* clear delete status bit to indicate deleted */
- pt1.deleted = 0;
- }
-#else
- ((u8 *) & pt1)[8] = 0xff;
- if (etags->is_deleted) {
- memset(&pt1, 0xff, 8);
- /* zero page_status byte to indicate deleted */
- ((u8 *) & pt1)[8] = 0;
- }
-#endif
-
- memset(&ops, 0, sizeof(ops));
- ops.mode = MTD_OOB_AUTO;
- ops.len = (data) ? chunk_bytes : 0;
- ops.ooblen = YTAG1_SIZE;
- ops.datbuf = (u8 *) data;
- ops.oobbuf = (u8 *) & pt1;
-
- retval = mtd->write_oob(mtd, addr, &ops);
- if (retval) {
- yaffs_trace(YAFFS_TRACE_MTD,
- "write_oob failed, chunk %d, mtd error %d",
- nand_chunk, retval);
- }
- return retval ? YAFFS_FAIL : YAFFS_OK;
-}
-
-/* Return with empty ExtendedTags but add ecc_result.
- */
-static int rettags(struct yaffs_ext_tags *etags, int ecc_result, int retval)
-{
- if (etags) {
- memset(etags, 0, sizeof(*etags));
- etags->ecc_result = ecc_result;
- }
- return retval;
-}
-
-/* Read a chunk (page) from NAND.
- *
- * Caller expects ExtendedTags data to be usable even on error; that is,
- * all members except ecc_result and block_bad are zeroed.
- *
- * - Check ECC results for data (if applicable)
- * - Check for blank/erased block (return empty ExtendedTags if blank)
- * - Check the packed_tags1 mini-ECC (correct if necessary/possible)
- * - Convert packed_tags1 to ExtendedTags
- * - Update ecc_result and block_bad members to refect state.
- *
- * Returns YAFFS_OK or YAFFS_FAIL.
- */
-int nandmtd1_read_chunk_tags(struct yaffs_dev *dev,
- int nand_chunk, u8 * data,
- struct yaffs_ext_tags *etags)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- int chunk_bytes = dev->data_bytes_per_chunk;
- loff_t addr = ((loff_t) nand_chunk) * chunk_bytes;
- int eccres = YAFFS_ECC_RESULT_NO_ERROR;
- struct mtd_oob_ops ops;
- struct yaffs_packed_tags1 pt1;
- int retval;
- int deleted;
-
- memset(&ops, 0, sizeof(ops));
- ops.mode = MTD_OOB_AUTO;
- ops.len = (data) ? chunk_bytes : 0;
- ops.ooblen = YTAG1_SIZE;
- ops.datbuf = data;
- ops.oobbuf = (u8 *) & pt1;
-
- /* Read page and oob using MTD.
- * Check status and determine ECC result.
- */
- retval = mtd->read_oob(mtd, addr, &ops);
- if (retval) {
- yaffs_trace(YAFFS_TRACE_MTD,
- "read_oob failed, chunk %d, mtd error %d",
- nand_chunk, retval);
- }
-
- switch (retval) {
- case 0:
- /* no error */
- break;
-
- case -EUCLEAN:
- /* MTD's ECC fixed the data */
- eccres = YAFFS_ECC_RESULT_FIXED;
- dev->n_ecc_fixed++;
- break;
-
- case -EBADMSG:
- /* MTD's ECC could not fix the data */
- dev->n_ecc_unfixed++;
- /* fall into... */
- default:
- rettags(etags, YAFFS_ECC_RESULT_UNFIXED, 0);
- etags->block_bad = (mtd->block_isbad) (mtd, addr);
- return YAFFS_FAIL;
- }
-
- /* Check for a blank/erased chunk.
- */
- if (yaffs_check_ff((u8 *) & pt1, 8)) {
- /* when blank, upper layers want ecc_result to be <= NO_ERROR */
- return rettags(etags, YAFFS_ECC_RESULT_NO_ERROR, YAFFS_OK);
- }
-#ifndef CONFIG_YAFFS_9BYTE_TAGS
- /* Read deleted status (bit) then return it to it's non-deleted
- * state before performing tags mini-ECC check. pt1.deleted is
- * inverted.
- */
- deleted = !pt1.deleted;
- pt1.deleted = 1;
-#else
- deleted = (yaffs_count_bits(((u8 *) & pt1)[8]) < 7);
-#endif
-
- /* Check the packed tags mini-ECC and correct if necessary/possible.
- */
- retval = yaffs_check_tags_ecc((struct yaffs_tags *)&pt1);
- switch (retval) {
- case 0:
- /* no tags error, use MTD result */
- break;
- case 1:
- /* recovered tags-ECC error */
- dev->n_tags_ecc_fixed++;
- if (eccres == YAFFS_ECC_RESULT_NO_ERROR)
- eccres = YAFFS_ECC_RESULT_FIXED;
- break;
- default:
- /* unrecovered tags-ECC error */
- dev->n_tags_ecc_unfixed++;
- return rettags(etags, YAFFS_ECC_RESULT_UNFIXED, YAFFS_FAIL);
- }
-
- /* Unpack the tags to extended form and set ECC result.
- * [set should_be_ff just to keep yaffs_unpack_tags1 happy]
- */
- pt1.should_be_ff = 0xFFFFFFFF;
- yaffs_unpack_tags1(etags, &pt1);
- etags->ecc_result = eccres;
-
- /* Set deleted state */
- etags->is_deleted = deleted;
- return YAFFS_OK;
-}
-
-/* Mark a block bad.
- *
- * This is a persistant state.
- * Use of this function should be rare.
- *
- * Returns YAFFS_OK or YAFFS_FAIL.
- */
-int nandmtd1_mark_block_bad(struct yaffs_dev *dev, int block_no)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- int blocksize = dev->param.chunks_per_block * dev->data_bytes_per_chunk;
- int retval;
-
- yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
- "marking block %d bad", block_no);
-
- retval = mtd->block_markbad(mtd, (loff_t) blocksize * block_no);
- return (retval) ? YAFFS_FAIL : YAFFS_OK;
-}
-
-/* Check any MTD prerequists.
- *
- * Returns YAFFS_OK or YAFFS_FAIL.
- */
-static int nandmtd1_test_prerequists(struct mtd_info *mtd)
-{
- /* 2.6.18 has mtd->ecclayout->oobavail */
- /* 2.6.21 has mtd->ecclayout->oobavail and mtd->oobavail */
- int oobavail = mtd->ecclayout->oobavail;
-
- if (oobavail < YTAG1_SIZE) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "mtd device has only %d bytes for tags, need %d",
- oobavail, YTAG1_SIZE);
- return YAFFS_FAIL;
- }
- return YAFFS_OK;
-}
-
-/* Query for the current state of a specific block.
- *
- * Examine the tags of the first chunk of the block and return the state:
- * - YAFFS_BLOCK_STATE_DEAD, the block is marked bad
- * - YAFFS_BLOCK_STATE_NEEDS_SCANNING, the block is in use
- * - YAFFS_BLOCK_STATE_EMPTY, the block is clean
- *
- * Always returns YAFFS_OK.
- */
-int nandmtd1_query_block(struct yaffs_dev *dev, int block_no,
- enum yaffs_block_state *state_ptr, u32 * seq_ptr)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- int chunk_num = block_no * dev->param.chunks_per_block;
- loff_t addr = (loff_t) chunk_num * dev->data_bytes_per_chunk;
- struct yaffs_ext_tags etags;
- int state = YAFFS_BLOCK_STATE_DEAD;
- int seqnum = 0;
- int retval;
-
- /* We don't yet have a good place to test for MTD config prerequists.
- * Do it here as we are called during the initial scan.
- */
- if (nandmtd1_test_prerequists(mtd) != YAFFS_OK)
- return YAFFS_FAIL;
-
- retval = nandmtd1_read_chunk_tags(dev, chunk_num, NULL, &etags);
- etags.block_bad = (mtd->block_isbad) (mtd, addr);
- if (etags.block_bad) {
- yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
- "block %d is marked bad", block_no);
- state = YAFFS_BLOCK_STATE_DEAD;
- } else if (etags.ecc_result != YAFFS_ECC_RESULT_NO_ERROR) {
- /* bad tags, need to look more closely */
- state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
- } else if (etags.chunk_used) {
- state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
- seqnum = etags.seq_number;
- } else {
- state = YAFFS_BLOCK_STATE_EMPTY;
- }
-
- *state_ptr = state;
- *seq_ptr = seqnum;
-
- /* query always succeeds */
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_mtdif1.h b/fs/yaffs2/yaffs_mtdif1.h
deleted file mode 100644
index 07ce452..0000000
--- a/fs/yaffs2/yaffs_mtdif1.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_MTDIF1_H__
-#define __YAFFS_MTDIF1_H__
-
-int nandmtd1_write_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
- const u8 * data,
- const struct yaffs_ext_tags *tags);
-
-int nandmtd1_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
- u8 * data, struct yaffs_ext_tags *tags);
-
-int nandmtd1_mark_block_bad(struct yaffs_dev *dev, int block_no);
-
-int nandmtd1_query_block(struct yaffs_dev *dev, int block_no,
- enum yaffs_block_state *state, u32 * seq_number);
-
-#endif
diff --git a/fs/yaffs2/yaffs_mtdif2.c b/fs/yaffs2/yaffs_mtdif2.c
deleted file mode 100644
index d1643df..0000000
--- a/fs/yaffs2/yaffs_mtdif2.c
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* mtd interface for YAFFS2 */
-
-#include "yportenv.h"
-#include "yaffs_trace.h"
-
-#include "yaffs_mtdif2.h"
-
-#include "linux/mtd/mtd.h"
-#include "linux/types.h"
-#include "linux/time.h"
-
-#include "yaffs_packedtags2.h"
-
-#include "yaffs_linux.h"
-
-/* NB For use with inband tags....
- * We assume that the data buffer is of size total_bytes_per_chunk so that we can also
- * use it to load the tags.
- */
-int nandmtd2_write_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
- const u8 * data,
- const struct yaffs_ext_tags *tags)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- struct mtd_oob_ops ops;
- int retval = 0;
-
- loff_t addr;
-
- struct yaffs_packed_tags2 pt;
-
- int packed_tags_size =
- dev->param.no_tags_ecc ? sizeof(pt.t) : sizeof(pt);
- void *packed_tags_ptr =
- dev->param.no_tags_ecc ? (void *)&pt.t : (void *)&pt;
-
- yaffs_trace(YAFFS_TRACE_MTD,
- "nandmtd2_write_chunk_tags chunk %d data %p tags %p",
- nand_chunk, data, tags);
-
- addr = ((loff_t) nand_chunk) * dev->param.total_bytes_per_chunk;
-
- /* For yaffs2 writing there must be both data and tags.
- * If we're using inband tags, then the tags are stuffed into
- * the end of the data buffer.
- */
- if (!data || !tags)
- BUG();
- else if (dev->param.inband_tags) {
- struct yaffs_packed_tags2_tags_only *pt2tp;
- pt2tp =
- (struct yaffs_packed_tags2_tags_only *)(data +
- dev->
- data_bytes_per_chunk);
- yaffs_pack_tags2_tags_only(pt2tp, tags);
- } else {
- yaffs_pack_tags2(&pt, tags, !dev->param.no_tags_ecc);
- }
-
- ops.mode = MTD_OOB_AUTO;
- ops.ooblen = (dev->param.inband_tags) ? 0 : packed_tags_size;
- ops.len = dev->param.total_bytes_per_chunk;
- ops.ooboffs = 0;
- ops.datbuf = (u8 *) data;
- ops.oobbuf = (dev->param.inband_tags) ? NULL : packed_tags_ptr;
- retval = mtd->write_oob(mtd, addr, &ops);
-
- if (retval == 0)
- return YAFFS_OK;
- else
- return YAFFS_FAIL;
-}
-
-int nandmtd2_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
- u8 * data, struct yaffs_ext_tags *tags)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- struct mtd_oob_ops ops;
-
- size_t dummy;
- int retval = 0;
- int local_data = 0;
-
- loff_t addr = ((loff_t) nand_chunk) * dev->param.total_bytes_per_chunk;
-
- struct yaffs_packed_tags2 pt;
-
- int packed_tags_size =
- dev->param.no_tags_ecc ? sizeof(pt.t) : sizeof(pt);
- void *packed_tags_ptr =
- dev->param.no_tags_ecc ? (void *)&pt.t : (void *)&pt;
-
- yaffs_trace(YAFFS_TRACE_MTD,
- "nandmtd2_read_chunk_tags chunk %d data %p tags %p",
- nand_chunk, data, tags);
-
- if (dev->param.inband_tags) {
-
- if (!data) {
- local_data = 1;
- data = yaffs_get_temp_buffer(dev, __LINE__);
- }
-
- }
-
- if (dev->param.inband_tags || (data && !tags))
- retval = mtd->read(mtd, addr, dev->param.total_bytes_per_chunk,
- &dummy, data);
- else if (tags) {
- ops.mode = MTD_OOB_AUTO;
- ops.ooblen = packed_tags_size;
- ops.len = data ? dev->data_bytes_per_chunk : packed_tags_size;
- ops.ooboffs = 0;
- ops.datbuf = data;
- ops.oobbuf = yaffs_dev_to_lc(dev)->spare_buffer;
- retval = mtd->read_oob(mtd, addr, &ops);
- }
-
- if (dev->param.inband_tags) {
- if (tags) {
- struct yaffs_packed_tags2_tags_only *pt2tp;
- pt2tp =
- (struct yaffs_packed_tags2_tags_only *)&data[dev->
- data_bytes_per_chunk];
- yaffs_unpack_tags2_tags_only(tags, pt2tp);
- }
- } else {
- if (tags) {
- memcpy(packed_tags_ptr,
- yaffs_dev_to_lc(dev)->spare_buffer,
- packed_tags_size);
- yaffs_unpack_tags2(tags, &pt, !dev->param.no_tags_ecc);
- }
- }
-
- if (local_data)
- yaffs_release_temp_buffer(dev, data, __LINE__);
-
- if (tags && retval == -EBADMSG
- && tags->ecc_result == YAFFS_ECC_RESULT_NO_ERROR) {
- tags->ecc_result = YAFFS_ECC_RESULT_UNFIXED;
- dev->n_ecc_unfixed++;
- }
- if (tags && retval == -EUCLEAN
- && tags->ecc_result == YAFFS_ECC_RESULT_NO_ERROR) {
- tags->ecc_result = YAFFS_ECC_RESULT_FIXED;
- dev->n_ecc_fixed++;
- }
- if (retval == 0)
- return YAFFS_OK;
- else
- return YAFFS_FAIL;
-}
-
-int nandmtd2_mark_block_bad(struct yaffs_dev *dev, int block_no)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- int retval;
- yaffs_trace(YAFFS_TRACE_MTD,
- "nandmtd2_mark_block_bad %d", block_no);
-
- retval =
- mtd->block_markbad(mtd,
- block_no * dev->param.chunks_per_block *
- dev->param.total_bytes_per_chunk);
-
- if (retval == 0)
- return YAFFS_OK;
- else
- return YAFFS_FAIL;
-
-}
-
-int nandmtd2_query_block(struct yaffs_dev *dev, int block_no,
- enum yaffs_block_state *state, u32 * seq_number)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(dev);
- int retval;
-
- yaffs_trace(YAFFS_TRACE_MTD, "nandmtd2_query_block %d", block_no);
- retval =
- mtd->block_isbad(mtd,
- block_no * dev->param.chunks_per_block *
- dev->param.total_bytes_per_chunk);
-
- if (retval) {
- yaffs_trace(YAFFS_TRACE_MTD, "block is bad");
-
- *state = YAFFS_BLOCK_STATE_DEAD;
- *seq_number = 0;
- } else {
- struct yaffs_ext_tags t;
- nandmtd2_read_chunk_tags(dev, block_no *
- dev->param.chunks_per_block, NULL, &t);
-
- if (t.chunk_used) {
- *seq_number = t.seq_number;
- *state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
- } else {
- *seq_number = 0;
- *state = YAFFS_BLOCK_STATE_EMPTY;
- }
- }
- yaffs_trace(YAFFS_TRACE_MTD,
- "block is bad seq %d state %d", *seq_number, *state);
-
- if (retval == 0)
- return YAFFS_OK;
- else
- return YAFFS_FAIL;
-}
-
diff --git a/fs/yaffs2/yaffs_mtdif2.h b/fs/yaffs2/yaffs_mtdif2.h
deleted file mode 100644
index d821126..0000000
--- a/fs/yaffs2/yaffs_mtdif2.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_MTDIF2_H__
-#define __YAFFS_MTDIF2_H__
-
-#include "yaffs_guts.h"
-int nandmtd2_write_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
- const u8 * data,
- const struct yaffs_ext_tags *tags);
-int nandmtd2_read_chunk_tags(struct yaffs_dev *dev, int nand_chunk,
- u8 * data, struct yaffs_ext_tags *tags);
-int nandmtd2_mark_block_bad(struct yaffs_dev *dev, int block_no);
-int nandmtd2_query_block(struct yaffs_dev *dev, int block_no,
- enum yaffs_block_state *state, u32 * seq_number);
-
-#endif
diff --git a/fs/yaffs2/yaffs_nameval.c b/fs/yaffs2/yaffs_nameval.c
deleted file mode 100644
index daa36f9..0000000
--- a/fs/yaffs2/yaffs_nameval.c
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * This simple implementation of a name-value store assumes a small number of values and fits
- * into a small finite buffer.
- *
- * Each attribute is stored as a record:
- * sizeof(int) bytes record size.
- * strnlen+1 bytes name null terminated.
- * nbytes value.
- * ----------
- * total size stored in record size
- *
- * This code has not been tested with unicode yet.
- */
-
-#include "yaffs_nameval.h"
-
-#include "yportenv.h"
-
-static int nval_find(const char *xb, int xb_size, const YCHAR * name,
- int *exist_size)
-{
- int pos = 0;
- int size;
-
- memcpy(&size, xb, sizeof(int));
- while (size > 0 && (size < xb_size) && (pos + size < xb_size)) {
- if (strncmp
- ((YCHAR *) (xb + pos + sizeof(int)), name, size) == 0) {
- if (exist_size)
- *exist_size = size;
- return pos;
- }
- pos += size;
- if (pos < xb_size - sizeof(int))
- memcpy(&size, xb + pos, sizeof(int));
- else
- size = 0;
- }
- if (exist_size)
- *exist_size = 0;
- return -1;
-}
-
-static int nval_used(const char *xb, int xb_size)
-{
- int pos = 0;
- int size;
-
- memcpy(&size, xb + pos, sizeof(int));
- while (size > 0 && (size < xb_size) && (pos + size < xb_size)) {
- pos += size;
- if (pos < xb_size - sizeof(int))
- memcpy(&size, xb + pos, sizeof(int));
- else
- size = 0;
- }
- return pos;
-}
-
-int nval_del(char *xb, int xb_size, const YCHAR * name)
-{
- int pos = nval_find(xb, xb_size, name, NULL);
- int size;
-
- if (pos >= 0 && pos < xb_size) {
- /* Find size, shift rest over this record, then zero out the rest of buffer */
- memcpy(&size, xb + pos, sizeof(int));
- memcpy(xb + pos, xb + pos + size, xb_size - (pos + size));
- memset(xb + (xb_size - size), 0, size);
- return 0;
- } else {
- return -ENODATA;
- }
-}
-
-int nval_set(char *xb, int xb_size, const YCHAR * name, const char *buf,
- int bsize, int flags)
-{
- int pos;
- int namelen = strnlen(name, xb_size);
- int reclen;
- int size_exist = 0;
- int space;
- int start;
-
- pos = nval_find(xb, xb_size, name, &size_exist);
-
- if (flags & XATTR_CREATE && pos >= 0)
- return -EEXIST;
- if (flags & XATTR_REPLACE && pos < 0)
- return -ENODATA;
-
- start = nval_used(xb, xb_size);
- space = xb_size - start + size_exist;
-
- reclen = (sizeof(int) + namelen + 1 + bsize);
-
- if (reclen > space)
- return -ENOSPC;
-
- if (pos >= 0) {
- nval_del(xb, xb_size, name);
- start = nval_used(xb, xb_size);
- }
-
- pos = start;
-
- memcpy(xb + pos, &reclen, sizeof(int));
- pos += sizeof(int);
- strncpy((YCHAR *) (xb + pos), name, reclen);
- pos += (namelen + 1);
- memcpy(xb + pos, buf, bsize);
- return 0;
-}
-
-int nval_get(const char *xb, int xb_size, const YCHAR * name, char *buf,
- int bsize)
-{
- int pos = nval_find(xb, xb_size, name, NULL);
- int size;
-
- if (pos >= 0 && pos < xb_size) {
-
- memcpy(&size, xb + pos, sizeof(int));
- pos += sizeof(int); /* advance past record length */
- size -= sizeof(int);
-
- /* Advance over name string */
- while (xb[pos] && size > 0 && pos < xb_size) {
- pos++;
- size--;
- }
- /*Advance over NUL */
- pos++;
- size--;
-
- if (size <= bsize) {
- memcpy(buf, xb + pos, size);
- return size;
- }
-
- }
- if (pos >= 0)
- return -ERANGE;
- else
- return -ENODATA;
-}
-
-int nval_list(const char *xb, int xb_size, char *buf, int bsize)
-{
- int pos = 0;
- int size;
- int name_len;
- int ncopied = 0;
- int filled = 0;
-
- memcpy(&size, xb + pos, sizeof(int));
- while (size > sizeof(int) && size <= xb_size && (pos + size) < xb_size
- && !filled) {
- pos += sizeof(int);
- size -= sizeof(int);
- name_len = strnlen((YCHAR *) (xb + pos), size);
- if (ncopied + name_len + 1 < bsize) {
- memcpy(buf, xb + pos, name_len * sizeof(YCHAR));
- buf += name_len;
- *buf = '\0';
- buf++;
- if (sizeof(YCHAR) > 1) {
- *buf = '\0';
- buf++;
- }
- ncopied += (name_len + 1);
- } else {
- filled = 1;
- }
- pos += size;
- if (pos < xb_size - sizeof(int))
- memcpy(&size, xb + pos, sizeof(int));
- else
- size = 0;
- }
- return ncopied;
-}
-
-int nval_hasvalues(const char *xb, int xb_size)
-{
- return nval_used(xb, xb_size) > 0;
-}
diff --git a/fs/yaffs2/yaffs_nameval.h b/fs/yaffs2/yaffs_nameval.h
deleted file mode 100644
index 2bb02b6..0000000
--- a/fs/yaffs2/yaffs_nameval.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __NAMEVAL_H__
-#define __NAMEVAL_H__
-
-#include "yportenv.h"
-
-int nval_del(char *xb, int xb_size, const YCHAR * name);
-int nval_set(char *xb, int xb_size, const YCHAR * name, const char *buf,
- int bsize, int flags);
-int nval_get(const char *xb, int xb_size, const YCHAR * name, char *buf,
- int bsize);
-int nval_list(const char *xb, int xb_size, char *buf, int bsize);
-int nval_hasvalues(const char *xb, int xb_size);
-#endif
diff --git a/fs/yaffs2/yaffs_nand.c b/fs/yaffs2/yaffs_nand.c
deleted file mode 100644
index e816cab..0000000
--- a/fs/yaffs2/yaffs_nand.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_nand.h"
-#include "yaffs_tagscompat.h"
-#include "yaffs_tagsvalidity.h"
-
-#include "yaffs_getblockinfo.h"
-
-int yaffs_rd_chunk_tags_nand(struct yaffs_dev *dev, int nand_chunk,
- u8 * buffer, struct yaffs_ext_tags *tags)
-{
- int result;
- struct yaffs_ext_tags local_tags;
-
- int realigned_chunk = nand_chunk - dev->chunk_offset;
-
- dev->n_page_reads++;
-
- /* If there are no tags provided, use local tags to get prioritised gc working */
- if (!tags)
- tags = &local_tags;
-
- if (dev->param.read_chunk_tags_fn)
- result =
- dev->param.read_chunk_tags_fn(dev, realigned_chunk, buffer,
- tags);
- else
- result = yaffs_tags_compat_rd(dev,
- realigned_chunk, buffer, tags);
- if (tags && tags->ecc_result > YAFFS_ECC_RESULT_NO_ERROR) {
-
- struct yaffs_block_info *bi;
- bi = yaffs_get_block_info(dev,
- nand_chunk /
- dev->param.chunks_per_block);
- yaffs_handle_chunk_error(dev, bi);
- }
-
- return result;
-}
-
-int yaffs_wr_chunk_tags_nand(struct yaffs_dev *dev,
- int nand_chunk,
- const u8 * buffer, struct yaffs_ext_tags *tags)
-{
-
- dev->n_page_writes++;
-
- nand_chunk -= dev->chunk_offset;
-
- if (tags) {
- tags->seq_number = dev->seq_number;
- tags->chunk_used = 1;
- if (!yaffs_validate_tags(tags)) {
- yaffs_trace(YAFFS_TRACE_ERROR, "Writing uninitialised tags");
- YBUG();
- }
- yaffs_trace(YAFFS_TRACE_WRITE,
- "Writing chunk %d tags %d %d",
- nand_chunk, tags->obj_id, tags->chunk_id);
- } else {
- yaffs_trace(YAFFS_TRACE_ERROR, "Writing with no tags");
- YBUG();
- }
-
- if (dev->param.write_chunk_tags_fn)
- return dev->param.write_chunk_tags_fn(dev, nand_chunk, buffer,
- tags);
- else
- return yaffs_tags_compat_wr(dev, nand_chunk, buffer, tags);
-}
-
-int yaffs_mark_bad(struct yaffs_dev *dev, int block_no)
-{
- block_no -= dev->block_offset;
-
- if (dev->param.bad_block_fn)
- return dev->param.bad_block_fn(dev, block_no);
- else
- return yaffs_tags_compat_mark_bad(dev, block_no);
-}
-
-int yaffs_query_init_block_state(struct yaffs_dev *dev,
- int block_no,
- enum yaffs_block_state *state,
- u32 * seq_number)
-{
- block_no -= dev->block_offset;
-
- if (dev->param.query_block_fn)
- return dev->param.query_block_fn(dev, block_no, state,
- seq_number);
- else
- return yaffs_tags_compat_query_block(dev, block_no,
- state, seq_number);
-}
-
-int yaffs_erase_block(struct yaffs_dev *dev, int flash_block)
-{
- int result;
-
- flash_block -= dev->block_offset;
-
- dev->n_erasures++;
-
- result = dev->param.erase_fn(dev, flash_block);
-
- return result;
-}
-
-int yaffs_init_nand(struct yaffs_dev *dev)
-{
- if (dev->param.initialise_flash_fn)
- return dev->param.initialise_flash_fn(dev);
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_nand.h b/fs/yaffs2/yaffs_nand.h
deleted file mode 100644
index 543f198..0000000
--- a/fs/yaffs2/yaffs_nand.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_NAND_H__
-#define __YAFFS_NAND_H__
-#include "yaffs_guts.h"
-
-int yaffs_rd_chunk_tags_nand(struct yaffs_dev *dev, int nand_chunk,
- u8 * buffer, struct yaffs_ext_tags *tags);
-
-int yaffs_wr_chunk_tags_nand(struct yaffs_dev *dev,
- int nand_chunk,
- const u8 * buffer, struct yaffs_ext_tags *tags);
-
-int yaffs_mark_bad(struct yaffs_dev *dev, int block_no);
-
-int yaffs_query_init_block_state(struct yaffs_dev *dev,
- int block_no,
- enum yaffs_block_state *state,
- unsigned *seq_number);
-
-int yaffs_erase_block(struct yaffs_dev *dev, int flash_block);
-
-int yaffs_init_nand(struct yaffs_dev *dev);
-
-#endif
diff --git a/fs/yaffs2/yaffs_packedtags1.c b/fs/yaffs2/yaffs_packedtags1.c
deleted file mode 100644
index a77f095..0000000
--- a/fs/yaffs2/yaffs_packedtags1.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_packedtags1.h"
-#include "yportenv.h"
-
-void yaffs_pack_tags1(struct yaffs_packed_tags1 *pt,
- const struct yaffs_ext_tags *t)
-{
- pt->chunk_id = t->chunk_id;
- pt->serial_number = t->serial_number;
- pt->n_bytes = t->n_bytes;
- pt->obj_id = t->obj_id;
- pt->ecc = 0;
- pt->deleted = (t->is_deleted) ? 0 : 1;
- pt->unused_stuff = 0;
- pt->should_be_ff = 0xFFFFFFFF;
-
-}
-
-void yaffs_unpack_tags1(struct yaffs_ext_tags *t,
- const struct yaffs_packed_tags1 *pt)
-{
- static const u8 all_ff[] =
- { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
- 0xff
- };
-
- if (memcmp(all_ff, pt, sizeof(struct yaffs_packed_tags1))) {
- t->block_bad = 0;
- if (pt->should_be_ff != 0xFFFFFFFF)
- t->block_bad = 1;
- t->chunk_used = 1;
- t->obj_id = pt->obj_id;
- t->chunk_id = pt->chunk_id;
- t->n_bytes = pt->n_bytes;
- t->ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
- t->is_deleted = (pt->deleted) ? 0 : 1;
- t->serial_number = pt->serial_number;
- } else {
- memset(t, 0, sizeof(struct yaffs_ext_tags));
- }
-}
diff --git a/fs/yaffs2/yaffs_packedtags1.h b/fs/yaffs2/yaffs_packedtags1.h
deleted file mode 100644
index d6861ff..0000000
--- a/fs/yaffs2/yaffs_packedtags1.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-/* This is used to pack YAFFS1 tags, not YAFFS2 tags. */
-
-#ifndef __YAFFS_PACKEDTAGS1_H__
-#define __YAFFS_PACKEDTAGS1_H__
-
-#include "yaffs_guts.h"
-
-struct yaffs_packed_tags1 {
- unsigned chunk_id:20;
- unsigned serial_number:2;
- unsigned n_bytes:10;
- unsigned obj_id:18;
- unsigned ecc:12;
- unsigned deleted:1;
- unsigned unused_stuff:1;
- unsigned should_be_ff;
-
-};
-
-void yaffs_pack_tags1(struct yaffs_packed_tags1 *pt,
- const struct yaffs_ext_tags *t);
-void yaffs_unpack_tags1(struct yaffs_ext_tags *t,
- const struct yaffs_packed_tags1 *pt);
-#endif
diff --git a/fs/yaffs2/yaffs_packedtags2.c b/fs/yaffs2/yaffs_packedtags2.c
deleted file mode 100644
index 8e7fea3..0000000
--- a/fs/yaffs2/yaffs_packedtags2.c
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_packedtags2.h"
-#include "yportenv.h"
-#include "yaffs_trace.h"
-#include "yaffs_tagsvalidity.h"
-
-/* This code packs a set of extended tags into a binary structure for
- * NAND storage
- */
-
-/* Some of the information is "extra" struff which can be packed in to
- * speed scanning
- * This is defined by having the EXTRA_HEADER_INFO_FLAG set.
- */
-
-/* Extra flags applied to chunk_id */
-
-#define EXTRA_HEADER_INFO_FLAG 0x80000000
-#define EXTRA_SHRINK_FLAG 0x40000000
-#define EXTRA_SHADOWS_FLAG 0x20000000
-#define EXTRA_SPARE_FLAGS 0x10000000
-
-#define ALL_EXTRA_FLAGS 0xF0000000
-
-/* Also, the top 4 bits of the object Id are set to the object type. */
-#define EXTRA_OBJECT_TYPE_SHIFT (28)
-#define EXTRA_OBJECT_TYPE_MASK ((0x0F) << EXTRA_OBJECT_TYPE_SHIFT)
-
-static void yaffs_dump_packed_tags2_tags_only(const struct
- yaffs_packed_tags2_tags_only *ptt)
-{
- yaffs_trace(YAFFS_TRACE_MTD,
- "packed tags obj %d chunk %d byte %d seq %d",
- ptt->obj_id, ptt->chunk_id, ptt->n_bytes, ptt->seq_number);
-}
-
-static void yaffs_dump_packed_tags2(const struct yaffs_packed_tags2 *pt)
-{
- yaffs_dump_packed_tags2_tags_only(&pt->t);
-}
-
-static void yaffs_dump_tags2(const struct yaffs_ext_tags *t)
-{
- yaffs_trace(YAFFS_TRACE_MTD,
- "ext.tags eccres %d blkbad %d chused %d obj %d chunk%d byte %d del %d ser %d seq %d",
- t->ecc_result, t->block_bad, t->chunk_used, t->obj_id,
- t->chunk_id, t->n_bytes, t->is_deleted, t->serial_number,
- t->seq_number);
-
-}
-
-void yaffs_pack_tags2_tags_only(struct yaffs_packed_tags2_tags_only *ptt,
- const struct yaffs_ext_tags *t)
-{
- ptt->chunk_id = t->chunk_id;
- ptt->seq_number = t->seq_number;
- ptt->n_bytes = t->n_bytes;
- ptt->obj_id = t->obj_id;
-
- if (t->chunk_id == 0 && t->extra_available) {
- /* Store the extra header info instead */
- /* We save the parent object in the chunk_id */
- ptt->chunk_id = EXTRA_HEADER_INFO_FLAG | t->extra_parent_id;
- if (t->extra_is_shrink)
- ptt->chunk_id |= EXTRA_SHRINK_FLAG;
- if (t->extra_shadows)
- ptt->chunk_id |= EXTRA_SHADOWS_FLAG;
-
- ptt->obj_id &= ~EXTRA_OBJECT_TYPE_MASK;
- ptt->obj_id |= (t->extra_obj_type << EXTRA_OBJECT_TYPE_SHIFT);
-
- if (t->extra_obj_type == YAFFS_OBJECT_TYPE_HARDLINK)
- ptt->n_bytes = t->extra_equiv_id;
- else if (t->extra_obj_type == YAFFS_OBJECT_TYPE_FILE)
- ptt->n_bytes = t->extra_length;
- else
- ptt->n_bytes = 0;
- }
-
- yaffs_dump_packed_tags2_tags_only(ptt);
- yaffs_dump_tags2(t);
-}
-
-void yaffs_pack_tags2(struct yaffs_packed_tags2 *pt,
- const struct yaffs_ext_tags *t, int tags_ecc)
-{
- yaffs_pack_tags2_tags_only(&pt->t, t);
-
- if (tags_ecc)
- yaffs_ecc_calc_other((unsigned char *)&pt->t,
- sizeof(struct
- yaffs_packed_tags2_tags_only),
- &pt->ecc);
-}
-
-void yaffs_unpack_tags2_tags_only(struct yaffs_ext_tags *t,
- struct yaffs_packed_tags2_tags_only *ptt)
-{
-
- memset(t, 0, sizeof(struct yaffs_ext_tags));
-
- yaffs_init_tags(t);
-
- if (ptt->seq_number != 0xFFFFFFFF) {
- t->block_bad = 0;
- t->chunk_used = 1;
- t->obj_id = ptt->obj_id;
- t->chunk_id = ptt->chunk_id;
- t->n_bytes = ptt->n_bytes;
- t->is_deleted = 0;
- t->serial_number = 0;
- t->seq_number = ptt->seq_number;
-
- /* Do extra header info stuff */
-
- if (ptt->chunk_id & EXTRA_HEADER_INFO_FLAG) {
- t->chunk_id = 0;
- t->n_bytes = 0;
-
- t->extra_available = 1;
- t->extra_parent_id =
- ptt->chunk_id & (~(ALL_EXTRA_FLAGS));
- t->extra_is_shrink =
- (ptt->chunk_id & EXTRA_SHRINK_FLAG) ? 1 : 0;
- t->extra_shadows =
- (ptt->chunk_id & EXTRA_SHADOWS_FLAG) ? 1 : 0;
- t->extra_obj_type =
- ptt->obj_id >> EXTRA_OBJECT_TYPE_SHIFT;
- t->obj_id &= ~EXTRA_OBJECT_TYPE_MASK;
-
- if (t->extra_obj_type == YAFFS_OBJECT_TYPE_HARDLINK)
- t->extra_equiv_id = ptt->n_bytes;
- else
- t->extra_length = ptt->n_bytes;
- }
- }
-
- yaffs_dump_packed_tags2_tags_only(ptt);
- yaffs_dump_tags2(t);
-
-}
-
-void yaffs_unpack_tags2(struct yaffs_ext_tags *t, struct yaffs_packed_tags2 *pt,
- int tags_ecc)
-{
-
- enum yaffs_ecc_result ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
-
- if (pt->t.seq_number != 0xFFFFFFFF && tags_ecc) {
- /* Chunk is in use and we need to do ECC */
-
- struct yaffs_ecc_other ecc;
- int result;
- yaffs_ecc_calc_other((unsigned char *)&pt->t,
- sizeof(struct
- yaffs_packed_tags2_tags_only),
- &ecc);
- result =
- yaffs_ecc_correct_other((unsigned char *)&pt->t,
- sizeof(struct
- yaffs_packed_tags2_tags_only),
- &pt->ecc, &ecc);
- switch (result) {
- case 0:
- ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
- break;
- case 1:
- ecc_result = YAFFS_ECC_RESULT_FIXED;
- break;
- case -1:
- ecc_result = YAFFS_ECC_RESULT_UNFIXED;
- break;
- default:
- ecc_result = YAFFS_ECC_RESULT_UNKNOWN;
- }
- }
-
- yaffs_unpack_tags2_tags_only(t, &pt->t);
-
- t->ecc_result = ecc_result;
-
- yaffs_dump_packed_tags2(pt);
- yaffs_dump_tags2(t);
-}
diff --git a/fs/yaffs2/yaffs_packedtags2.h b/fs/yaffs2/yaffs_packedtags2.h
deleted file mode 100644
index f329669..0000000
--- a/fs/yaffs2/yaffs_packedtags2.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-/* This is used to pack YAFFS2 tags, not YAFFS1tags. */
-
-#ifndef __YAFFS_PACKEDTAGS2_H__
-#define __YAFFS_PACKEDTAGS2_H__
-
-#include "yaffs_guts.h"
-#include "yaffs_ecc.h"
-
-struct yaffs_packed_tags2_tags_only {
- unsigned seq_number;
- unsigned obj_id;
- unsigned chunk_id;
- unsigned n_bytes;
-};
-
-struct yaffs_packed_tags2 {
- struct yaffs_packed_tags2_tags_only t;
- struct yaffs_ecc_other ecc;
-};
-
-/* Full packed tags with ECC, used for oob tags */
-void yaffs_pack_tags2(struct yaffs_packed_tags2 *pt,
- const struct yaffs_ext_tags *t, int tags_ecc);
-void yaffs_unpack_tags2(struct yaffs_ext_tags *t, struct yaffs_packed_tags2 *pt,
- int tags_ecc);
-
-/* Only the tags part (no ECC for use with inband tags */
-void yaffs_pack_tags2_tags_only(struct yaffs_packed_tags2_tags_only *pt,
- const struct yaffs_ext_tags *t);
-void yaffs_unpack_tags2_tags_only(struct yaffs_ext_tags *t,
- struct yaffs_packed_tags2_tags_only *pt);
-#endif
diff --git a/fs/yaffs2/yaffs_tagscompat.c b/fs/yaffs2/yaffs_tagscompat.c
deleted file mode 100644
index 7578075..0000000
--- a/fs/yaffs2/yaffs_tagscompat.c
+++ /dev/null
@@ -1,422 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_guts.h"
-#include "yaffs_tagscompat.h"
-#include "yaffs_ecc.h"
-#include "yaffs_getblockinfo.h"
-#include "yaffs_trace.h"
-
-static void yaffs_handle_rd_data_error(struct yaffs_dev *dev, int nand_chunk);
-
-
-/********** Tags ECC calculations *********/
-
-void yaffs_calc_ecc(const u8 * data, struct yaffs_spare *spare)
-{
- yaffs_ecc_cacl(data, spare->ecc1);
- yaffs_ecc_cacl(&data[256], spare->ecc2);
-}
-
-void yaffs_calc_tags_ecc(struct yaffs_tags *tags)
-{
- /* Calculate an ecc */
-
- unsigned char *b = ((union yaffs_tags_union *)tags)->as_bytes;
- unsigned i, j;
- unsigned ecc = 0;
- unsigned bit = 0;
-
- tags->ecc = 0;
-
- for (i = 0; i < 8; i++) {
- for (j = 1; j & 0xff; j <<= 1) {
- bit++;
- if (b[i] & j)
- ecc ^= bit;
- }
- }
-
- tags->ecc = ecc;
-
-}
-
-int yaffs_check_tags_ecc(struct yaffs_tags *tags)
-{
- unsigned ecc = tags->ecc;
-
- yaffs_calc_tags_ecc(tags);
-
- ecc ^= tags->ecc;
-
- if (ecc && ecc <= 64) {
- /* TODO: Handle the failure better. Retire? */
- unsigned char *b = ((union yaffs_tags_union *)tags)->as_bytes;
-
- ecc--;
-
- b[ecc / 8] ^= (1 << (ecc & 7));
-
- /* Now recvalc the ecc */
- yaffs_calc_tags_ecc(tags);
-
- return 1; /* recovered error */
- } else if (ecc) {
- /* Wierd ecc failure value */
- /* TODO Need to do somethiong here */
- return -1; /* unrecovered error */
- }
-
- return 0;
-}
-
-/********** Tags **********/
-
-static void yaffs_load_tags_to_spare(struct yaffs_spare *spare_ptr,
- struct yaffs_tags *tags_ptr)
-{
- union yaffs_tags_union *tu = (union yaffs_tags_union *)tags_ptr;
-
- yaffs_calc_tags_ecc(tags_ptr);
-
- spare_ptr->tb0 = tu->as_bytes[0];
- spare_ptr->tb1 = tu->as_bytes[1];
- spare_ptr->tb2 = tu->as_bytes[2];
- spare_ptr->tb3 = tu->as_bytes[3];
- spare_ptr->tb4 = tu->as_bytes[4];
- spare_ptr->tb5 = tu->as_bytes[5];
- spare_ptr->tb6 = tu->as_bytes[6];
- spare_ptr->tb7 = tu->as_bytes[7];
-}
-
-static void yaffs_get_tags_from_spare(struct yaffs_dev *dev,
- struct yaffs_spare *spare_ptr,
- struct yaffs_tags *tags_ptr)
-{
- union yaffs_tags_union *tu = (union yaffs_tags_union *)tags_ptr;
- int result;
-
- tu->as_bytes[0] = spare_ptr->tb0;
- tu->as_bytes[1] = spare_ptr->tb1;
- tu->as_bytes[2] = spare_ptr->tb2;
- tu->as_bytes[3] = spare_ptr->tb3;
- tu->as_bytes[4] = spare_ptr->tb4;
- tu->as_bytes[5] = spare_ptr->tb5;
- tu->as_bytes[6] = spare_ptr->tb6;
- tu->as_bytes[7] = spare_ptr->tb7;
-
- result = yaffs_check_tags_ecc(tags_ptr);
- if (result > 0)
- dev->n_tags_ecc_fixed++;
- else if (result < 0)
- dev->n_tags_ecc_unfixed++;
-}
-
-static void yaffs_spare_init(struct yaffs_spare *spare)
-{
- memset(spare, 0xFF, sizeof(struct yaffs_spare));
-}
-
-static int yaffs_wr_nand(struct yaffs_dev *dev,
- int nand_chunk, const u8 * data,
- struct yaffs_spare *spare)
-{
- if (nand_chunk < dev->param.start_block * dev->param.chunks_per_block) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>> yaffs chunk %d is not valid",
- nand_chunk);
- return YAFFS_FAIL;
- }
-
- return dev->param.write_chunk_fn(dev, nand_chunk, data, spare);
-}
-
-static int yaffs_rd_chunk_nand(struct yaffs_dev *dev,
- int nand_chunk,
- u8 * data,
- struct yaffs_spare *spare,
- enum yaffs_ecc_result *ecc_result,
- int correct_errors)
-{
- int ret_val;
- struct yaffs_spare local_spare;
-
- if (!spare && data) {
- /* If we don't have a real spare, then we use a local one. */
- /* Need this for the calculation of the ecc */
- spare = &local_spare;
- }
-
- if (!dev->param.use_nand_ecc) {
- ret_val =
- dev->param.read_chunk_fn(dev, nand_chunk, data, spare);
- if (data && correct_errors) {
- /* Do ECC correction */
- /* Todo handle any errors */
- int ecc_result1, ecc_result2;
- u8 calc_ecc[3];
-
- yaffs_ecc_cacl(data, calc_ecc);
- ecc_result1 =
- yaffs_ecc_correct(data, spare->ecc1, calc_ecc);
- yaffs_ecc_cacl(&data[256], calc_ecc);
- ecc_result2 =
- yaffs_ecc_correct(&data[256], spare->ecc2,
- calc_ecc);
-
- if (ecc_result1 > 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>yaffs ecc error fix performed on chunk %d:0",
- nand_chunk);
- dev->n_ecc_fixed++;
- } else if (ecc_result1 < 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>yaffs ecc error unfixed on chunk %d:0",
- nand_chunk);
- dev->n_ecc_unfixed++;
- }
-
- if (ecc_result2 > 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>yaffs ecc error fix performed on chunk %d:1",
- nand_chunk);
- dev->n_ecc_fixed++;
- } else if (ecc_result2 < 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>yaffs ecc error unfixed on chunk %d:1",
- nand_chunk);
- dev->n_ecc_unfixed++;
- }
-
- if (ecc_result1 || ecc_result2) {
- /* We had a data problem on this page */
- yaffs_handle_rd_data_error(dev, nand_chunk);
- }
-
- if (ecc_result1 < 0 || ecc_result2 < 0)
- *ecc_result = YAFFS_ECC_RESULT_UNFIXED;
- else if (ecc_result1 > 0 || ecc_result2 > 0)
- *ecc_result = YAFFS_ECC_RESULT_FIXED;
- else
- *ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
- }
- } else {
- /* Must allocate enough memory for spare+2*sizeof(int) */
- /* for ecc results from device. */
- struct yaffs_nand_spare nspare;
-
- memset(&nspare, 0, sizeof(nspare));
-
- ret_val = dev->param.read_chunk_fn(dev, nand_chunk, data,
- (struct yaffs_spare *)
- &nspare);
- memcpy(spare, &nspare, sizeof(struct yaffs_spare));
- if (data && correct_errors) {
- if (nspare.eccres1 > 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>mtd ecc error fix performed on chunk %d:0",
- nand_chunk);
- } else if (nspare.eccres1 < 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>mtd ecc error unfixed on chunk %d:0",
- nand_chunk);
- }
-
- if (nspare.eccres2 > 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>mtd ecc error fix performed on chunk %d:1",
- nand_chunk);
- } else if (nspare.eccres2 < 0) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "**>>mtd ecc error unfixed on chunk %d:1",
- nand_chunk);
- }
-
- if (nspare.eccres1 || nspare.eccres2) {
- /* We had a data problem on this page */
- yaffs_handle_rd_data_error(dev, nand_chunk);
- }
-
- if (nspare.eccres1 < 0 || nspare.eccres2 < 0)
- *ecc_result = YAFFS_ECC_RESULT_UNFIXED;
- else if (nspare.eccres1 > 0 || nspare.eccres2 > 0)
- *ecc_result = YAFFS_ECC_RESULT_FIXED;
- else
- *ecc_result = YAFFS_ECC_RESULT_NO_ERROR;
-
- }
- }
- return ret_val;
-}
-
-/*
- * Functions for robustisizing
- */
-
-static void yaffs_handle_rd_data_error(struct yaffs_dev *dev, int nand_chunk)
-{
- int flash_block = nand_chunk / dev->param.chunks_per_block;
-
- /* Mark the block for retirement */
- yaffs_get_block_info(dev,
- flash_block + dev->block_offset)->needs_retiring =
- 1;
- yaffs_trace(YAFFS_TRACE_ERROR | YAFFS_TRACE_BAD_BLOCKS,
- "**>>Block %d marked for retirement",
- flash_block);
-
- /* TODO:
- * Just do a garbage collection on the affected block
- * then retire the block
- * NB recursion
- */
-}
-
-int yaffs_tags_compat_wr(struct yaffs_dev *dev,
- int nand_chunk,
- const u8 * data, const struct yaffs_ext_tags *ext_tags)
-{
- struct yaffs_spare spare;
- struct yaffs_tags tags;
-
- yaffs_spare_init(&spare);
-
- if (ext_tags->is_deleted)
- spare.page_status = 0;
- else {
- tags.obj_id = ext_tags->obj_id;
- tags.chunk_id = ext_tags->chunk_id;
-
- tags.n_bytes_lsb = ext_tags->n_bytes & 0x3ff;
-
- if (dev->data_bytes_per_chunk >= 1024)
- tags.n_bytes_msb = (ext_tags->n_bytes >> 10) & 3;
- else
- tags.n_bytes_msb = 3;
-
- tags.serial_number = ext_tags->serial_number;
-
- if (!dev->param.use_nand_ecc && data)
- yaffs_calc_ecc(data, &spare);
-
- yaffs_load_tags_to_spare(&spare, &tags);
-
- }
-
- return yaffs_wr_nand(dev, nand_chunk, data, &spare);
-}
-
-int yaffs_tags_compat_rd(struct yaffs_dev *dev,
- int nand_chunk,
- u8 * data, struct yaffs_ext_tags *ext_tags)
-{
-
- struct yaffs_spare spare;
- struct yaffs_tags tags;
- enum yaffs_ecc_result ecc_result = YAFFS_ECC_RESULT_UNKNOWN;
-
- static struct yaffs_spare spare_ff;
- static int init;
-
- if (!init) {
- memset(&spare_ff, 0xFF, sizeof(spare_ff));
- init = 1;
- }
-
- if (yaffs_rd_chunk_nand(dev, nand_chunk, data, &spare, &ecc_result, 1)) {
- /* ext_tags may be NULL */
- if (ext_tags) {
-
- int deleted =
- (hweight8(spare.page_status) < 7) ? 1 : 0;
-
- ext_tags->is_deleted = deleted;
- ext_tags->ecc_result = ecc_result;
- ext_tags->block_bad = 0; /* We're reading it */
- /* therefore it is not a bad block */
- ext_tags->chunk_used =
- (memcmp(&spare_ff, &spare, sizeof(spare_ff)) !=
- 0) ? 1 : 0;
-
- if (ext_tags->chunk_used) {
- yaffs_get_tags_from_spare(dev, &spare, &tags);
-
- ext_tags->obj_id = tags.obj_id;
- ext_tags->chunk_id = tags.chunk_id;
- ext_tags->n_bytes = tags.n_bytes_lsb;
-
- if (dev->data_bytes_per_chunk >= 1024)
- ext_tags->n_bytes |=
- (((unsigned)tags.
- n_bytes_msb) << 10);
-
- ext_tags->serial_number = tags.serial_number;
- }
- }
-
- return YAFFS_OK;
- } else {
- return YAFFS_FAIL;
- }
-}
-
-int yaffs_tags_compat_mark_bad(struct yaffs_dev *dev, int flash_block)
-{
-
- struct yaffs_spare spare;
-
- memset(&spare, 0xff, sizeof(struct yaffs_spare));
-
- spare.block_status = 'Y';
-
- yaffs_wr_nand(dev, flash_block * dev->param.chunks_per_block, NULL,
- &spare);
- yaffs_wr_nand(dev, flash_block * dev->param.chunks_per_block + 1,
- NULL, &spare);
-
- return YAFFS_OK;
-
-}
-
-int yaffs_tags_compat_query_block(struct yaffs_dev *dev,
- int block_no,
- enum yaffs_block_state *state,
- u32 * seq_number)
-{
-
- struct yaffs_spare spare0, spare1;
- static struct yaffs_spare spare_ff;
- static int init;
- enum yaffs_ecc_result dummy;
-
- if (!init) {
- memset(&spare_ff, 0xFF, sizeof(spare_ff));
- init = 1;
- }
-
- *seq_number = 0;
-
- yaffs_rd_chunk_nand(dev, block_no * dev->param.chunks_per_block, NULL,
- &spare0, &dummy, 1);
- yaffs_rd_chunk_nand(dev, block_no * dev->param.chunks_per_block + 1,
- NULL, &spare1, &dummy, 1);
-
- if (hweight8(spare0.block_status & spare1.block_status) < 7)
- *state = YAFFS_BLOCK_STATE_DEAD;
- else if (memcmp(&spare_ff, &spare0, sizeof(spare_ff)) == 0)
- *state = YAFFS_BLOCK_STATE_EMPTY;
- else
- *state = YAFFS_BLOCK_STATE_NEEDS_SCANNING;
-
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_tagscompat.h b/fs/yaffs2/yaffs_tagscompat.h
deleted file mode 100644
index 8cd35dc..0000000
--- a/fs/yaffs2/yaffs_tagscompat.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_TAGSCOMPAT_H__
-#define __YAFFS_TAGSCOMPAT_H__
-
-#include "yaffs_guts.h"
-int yaffs_tags_compat_wr(struct yaffs_dev *dev,
- int nand_chunk,
- const u8 * data, const struct yaffs_ext_tags *tags);
-int yaffs_tags_compat_rd(struct yaffs_dev *dev,
- int nand_chunk,
- u8 * data, struct yaffs_ext_tags *tags);
-int yaffs_tags_compat_mark_bad(struct yaffs_dev *dev, int block_no);
-int yaffs_tags_compat_query_block(struct yaffs_dev *dev,
- int block_no,
- enum yaffs_block_state *state,
- u32 * seq_number);
-
-void yaffs_calc_tags_ecc(struct yaffs_tags *tags);
-int yaffs_check_tags_ecc(struct yaffs_tags *tags);
-int yaffs_count_bits(u8 byte);
-
-#endif
diff --git a/fs/yaffs2/yaffs_tagsvalidity.c b/fs/yaffs2/yaffs_tagsvalidity.c
deleted file mode 100644
index 4358d79..0000000
--- a/fs/yaffs2/yaffs_tagsvalidity.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_tagsvalidity.h"
-
-void yaffs_init_tags(struct yaffs_ext_tags *tags)
-{
- memset(tags, 0, sizeof(struct yaffs_ext_tags));
- tags->validity0 = 0xAAAAAAAA;
- tags->validity1 = 0x55555555;
-}
-
-int yaffs_validate_tags(struct yaffs_ext_tags *tags)
-{
- return (tags->validity0 == 0xAAAAAAAA && tags->validity1 == 0x55555555);
-
-}
diff --git a/fs/yaffs2/yaffs_tagsvalidity.h b/fs/yaffs2/yaffs_tagsvalidity.h
deleted file mode 100644
index 36a021f..0000000
--- a/fs/yaffs2/yaffs_tagsvalidity.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_TAGS_VALIDITY_H__
-#define __YAFFS_TAGS_VALIDITY_H__
-
-#include "yaffs_guts.h"
-
-void yaffs_init_tags(struct yaffs_ext_tags *tags);
-int yaffs_validate_tags(struct yaffs_ext_tags *tags);
-#endif
diff --git a/fs/yaffs2/yaffs_trace.h b/fs/yaffs2/yaffs_trace.h
deleted file mode 100644
index 6273dbf..0000000
--- a/fs/yaffs2/yaffs_trace.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YTRACE_H__
-#define __YTRACE_H__
-
-extern unsigned int yaffs_trace_mask;
-extern unsigned int yaffs_wr_attempts;
-
-/*
- * Tracing flags.
- * The flags masked in YAFFS_TRACE_ALWAYS are always traced.
- */
-
-#define YAFFS_TRACE_OS 0x00000002
-#define YAFFS_TRACE_ALLOCATE 0x00000004
-#define YAFFS_TRACE_SCAN 0x00000008
-#define YAFFS_TRACE_BAD_BLOCKS 0x00000010
-#define YAFFS_TRACE_ERASE 0x00000020
-#define YAFFS_TRACE_GC 0x00000040
-#define YAFFS_TRACE_WRITE 0x00000080
-#define YAFFS_TRACE_TRACING 0x00000100
-#define YAFFS_TRACE_DELETION 0x00000200
-#define YAFFS_TRACE_BUFFERS 0x00000400
-#define YAFFS_TRACE_NANDACCESS 0x00000800
-#define YAFFS_TRACE_GC_DETAIL 0x00001000
-#define YAFFS_TRACE_SCAN_DEBUG 0x00002000
-#define YAFFS_TRACE_MTD 0x00004000
-#define YAFFS_TRACE_CHECKPOINT 0x00008000
-
-#define YAFFS_TRACE_VERIFY 0x00010000
-#define YAFFS_TRACE_VERIFY_NAND 0x00020000
-#define YAFFS_TRACE_VERIFY_FULL 0x00040000
-#define YAFFS_TRACE_VERIFY_ALL 0x000F0000
-
-#define YAFFS_TRACE_SYNC 0x00100000
-#define YAFFS_TRACE_BACKGROUND 0x00200000
-#define YAFFS_TRACE_LOCK 0x00400000
-#define YAFFS_TRACE_MOUNT 0x00800000
-
-#define YAFFS_TRACE_ERROR 0x40000000
-#define YAFFS_TRACE_BUG 0x80000000
-#define YAFFS_TRACE_ALWAYS 0xF0000000
-
-#endif
diff --git a/fs/yaffs2/yaffs_verify.c b/fs/yaffs2/yaffs_verify.c
deleted file mode 100644
index 738c7f6..0000000
--- a/fs/yaffs2/yaffs_verify.c
+++ /dev/null
@@ -1,535 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_verify.h"
-#include "yaffs_trace.h"
-#include "yaffs_bitmap.h"
-#include "yaffs_getblockinfo.h"
-#include "yaffs_nand.h"
-
-int yaffs_skip_verification(struct yaffs_dev *dev)
-{
- dev = dev;
- return !(yaffs_trace_mask &
- (YAFFS_TRACE_VERIFY | YAFFS_TRACE_VERIFY_FULL));
-}
-
-static int yaffs_skip_full_verification(struct yaffs_dev *dev)
-{
- dev = dev;
- return !(yaffs_trace_mask & (YAFFS_TRACE_VERIFY_FULL));
-}
-
-static int yaffs_skip_nand_verification(struct yaffs_dev *dev)
-{
- dev = dev;
- return !(yaffs_trace_mask & (YAFFS_TRACE_VERIFY_NAND));
-}
-
-static const char *block_state_name[] = {
- "Unknown",
- "Needs scanning",
- "Scanning",
- "Empty",
- "Allocating",
- "Full",
- "Dirty",
- "Checkpoint",
- "Collecting",
- "Dead"
-};
-
-void yaffs_verify_blk(struct yaffs_dev *dev, struct yaffs_block_info *bi, int n)
-{
- int actually_used;
- int in_use;
-
- if (yaffs_skip_verification(dev))
- return;
-
- /* Report illegal runtime states */
- if (bi->block_state >= YAFFS_NUMBER_OF_BLOCK_STATES)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Block %d has undefined state %d",
- n, bi->block_state);
-
- switch (bi->block_state) {
- case YAFFS_BLOCK_STATE_UNKNOWN:
- case YAFFS_BLOCK_STATE_SCANNING:
- case YAFFS_BLOCK_STATE_NEEDS_SCANNING:
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Block %d has bad run-state %s",
- n, block_state_name[bi->block_state]);
- }
-
- /* Check pages in use and soft deletions are legal */
-
- actually_used = bi->pages_in_use - bi->soft_del_pages;
-
- if (bi->pages_in_use < 0
- || bi->pages_in_use > dev->param.chunks_per_block
- || bi->soft_del_pages < 0
- || bi->soft_del_pages > dev->param.chunks_per_block
- || actually_used < 0 || actually_used > dev->param.chunks_per_block)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Block %d has illegal values pages_in_used %d soft_del_pages %d",
- n, bi->pages_in_use, bi->soft_del_pages);
-
- /* Check chunk bitmap legal */
- in_use = yaffs_count_chunk_bits(dev, n);
- if (in_use != bi->pages_in_use)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Block %d has inconsistent values pages_in_use %d counted chunk bits %d",
- n, bi->pages_in_use, in_use);
-
-}
-
-void yaffs_verify_collected_blk(struct yaffs_dev *dev,
- struct yaffs_block_info *bi, int n)
-{
- yaffs_verify_blk(dev, bi, n);
-
- /* After collection the block should be in the erased state */
-
- if (bi->block_state != YAFFS_BLOCK_STATE_COLLECTING &&
- bi->block_state != YAFFS_BLOCK_STATE_EMPTY) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "Block %d is in state %d after gc, should be erased",
- n, bi->block_state);
- }
-}
-
-void yaffs_verify_blocks(struct yaffs_dev *dev)
-{
- int i;
- int state_count[YAFFS_NUMBER_OF_BLOCK_STATES];
- int illegal_states = 0;
-
- if (yaffs_skip_verification(dev))
- return;
-
- memset(state_count, 0, sizeof(state_count));
-
- for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
- struct yaffs_block_info *bi = yaffs_get_block_info(dev, i);
- yaffs_verify_blk(dev, bi, i);
-
- if (bi->block_state < YAFFS_NUMBER_OF_BLOCK_STATES)
- state_count[bi->block_state]++;
- else
- illegal_states++;
- }
-
- yaffs_trace(YAFFS_TRACE_VERIFY, "Block summary");
-
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "%d blocks have illegal states",
- illegal_states);
- if (state_count[YAFFS_BLOCK_STATE_ALLOCATING] > 1)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Too many allocating blocks");
-
- for (i = 0; i < YAFFS_NUMBER_OF_BLOCK_STATES; i++)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "%s %d blocks",
- block_state_name[i], state_count[i]);
-
- if (dev->blocks_in_checkpt != state_count[YAFFS_BLOCK_STATE_CHECKPOINT])
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Checkpoint block count wrong dev %d count %d",
- dev->blocks_in_checkpt,
- state_count[YAFFS_BLOCK_STATE_CHECKPOINT]);
-
- if (dev->n_erased_blocks != state_count[YAFFS_BLOCK_STATE_EMPTY])
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Erased block count wrong dev %d count %d",
- dev->n_erased_blocks,
- state_count[YAFFS_BLOCK_STATE_EMPTY]);
-
- if (state_count[YAFFS_BLOCK_STATE_COLLECTING] > 1)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Too many collecting blocks %d (max is 1)",
- state_count[YAFFS_BLOCK_STATE_COLLECTING]);
-}
-
-/*
- * Verify the object header. oh must be valid, but obj and tags may be NULL in which
- * case those tests will not be performed.
- */
-void yaffs_verify_oh(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh,
- struct yaffs_ext_tags *tags, int parent_check)
-{
- if (obj && yaffs_skip_verification(obj->my_dev))
- return;
-
- if (!(tags && obj && oh)) {
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Verifying object header tags %p obj %p oh %p",
- tags, obj, oh);
- return;
- }
-
- if (oh->type <= YAFFS_OBJECT_TYPE_UNKNOWN ||
- oh->type > YAFFS_OBJECT_TYPE_MAX)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d header type is illegal value 0x%x",
- tags->obj_id, oh->type);
-
- if (tags->obj_id != obj->obj_id)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d header mismatch obj_id %d",
- tags->obj_id, obj->obj_id);
-
- /*
- * Check that the object's parent ids match if parent_check requested.
- *
- * Tests do not apply to the root object.
- */
-
- if (parent_check && tags->obj_id > 1 && !obj->parent)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d header mismatch parent_id %d obj->parent is NULL",
- tags->obj_id, oh->parent_obj_id);
-
- if (parent_check && obj->parent &&
- oh->parent_obj_id != obj->parent->obj_id &&
- (oh->parent_obj_id != YAFFS_OBJECTID_UNLINKED ||
- obj->parent->obj_id != YAFFS_OBJECTID_DELETED))
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d header mismatch parent_id %d parent_obj_id %d",
- tags->obj_id, oh->parent_obj_id,
- obj->parent->obj_id);
-
- if (tags->obj_id > 1 && oh->name[0] == 0) /* Null name */
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d header name is NULL",
- obj->obj_id);
-
- if (tags->obj_id > 1 && ((u8) (oh->name[0])) == 0xff) /* Trashed name */
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d header name is 0xFF",
- obj->obj_id);
-}
-
-void yaffs_verify_file(struct yaffs_obj *obj)
-{
- int required_depth;
- int actual_depth;
- u32 last_chunk;
- u32 x;
- u32 i;
- struct yaffs_dev *dev;
- struct yaffs_ext_tags tags;
- struct yaffs_tnode *tn;
- u32 obj_id;
-
- if (!obj)
- return;
-
- if (yaffs_skip_verification(obj->my_dev))
- return;
-
- dev = obj->my_dev;
- obj_id = obj->obj_id;
-
- /* Check file size is consistent with tnode depth */
- last_chunk =
- obj->variant.file_variant.file_size / dev->data_bytes_per_chunk + 1;
- x = last_chunk >> YAFFS_TNODES_LEVEL0_BITS;
- required_depth = 0;
- while (x > 0) {
- x >>= YAFFS_TNODES_INTERNAL_BITS;
- required_depth++;
- }
-
- actual_depth = obj->variant.file_variant.top_level;
-
- /* Check that the chunks in the tnode tree are all correct.
- * We do this by scanning through the tnode tree and
- * checking the tags for every chunk match.
- */
-
- if (yaffs_skip_nand_verification(dev))
- return;
-
- for (i = 1; i <= last_chunk; i++) {
- tn = yaffs_find_tnode_0(dev, &obj->variant.file_variant, i);
-
- if (tn) {
- u32 the_chunk = yaffs_get_group_base(dev, tn, i);
- if (the_chunk > 0) {
- yaffs_rd_chunk_tags_nand(dev, the_chunk, NULL,
- &tags);
- if (tags.obj_id != obj_id || tags.chunk_id != i)
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Object %d chunk_id %d NAND mismatch chunk %d tags (%d:%d)",
- obj_id, i, the_chunk,
- tags.obj_id, tags.chunk_id);
- }
- }
- }
-}
-
-void yaffs_verify_link(struct yaffs_obj *obj)
-{
- if (obj && yaffs_skip_verification(obj->my_dev))
- return;
-
- /* Verify sane equivalent object */
-}
-
-void yaffs_verify_symlink(struct yaffs_obj *obj)
-{
- if (obj && yaffs_skip_verification(obj->my_dev))
- return;
-
- /* Verify symlink string */
-}
-
-void yaffs_verify_special(struct yaffs_obj *obj)
-{
- if (obj && yaffs_skip_verification(obj->my_dev))
- return;
-}
-
-void yaffs_verify_obj(struct yaffs_obj *obj)
-{
- struct yaffs_dev *dev;
-
- u32 chunk_min;
- u32 chunk_max;
-
- u32 chunk_id_ok;
- u32 chunk_in_range;
- u32 chunk_wrongly_deleted;
- u32 chunk_valid;
-
- if (!obj)
- return;
-
- if (obj->being_created)
- return;
-
- dev = obj->my_dev;
-
- if (yaffs_skip_verification(dev))
- return;
-
- /* Check sane object header chunk */
-
- chunk_min = dev->internal_start_block * dev->param.chunks_per_block;
- chunk_max =
- (dev->internal_end_block + 1) * dev->param.chunks_per_block - 1;
-
- chunk_in_range = (((unsigned)(obj->hdr_chunk)) >= chunk_min &&
- ((unsigned)(obj->hdr_chunk)) <= chunk_max);
- chunk_id_ok = chunk_in_range || (obj->hdr_chunk == 0);
- chunk_valid = chunk_in_range &&
- yaffs_check_chunk_bit(dev,
- obj->hdr_chunk / dev->param.chunks_per_block,
- obj->hdr_chunk % dev->param.chunks_per_block);
- chunk_wrongly_deleted = chunk_in_range && !chunk_valid;
-
- if (!obj->fake && (!chunk_id_ok || chunk_wrongly_deleted))
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d has chunk_id %d %s %s",
- obj->obj_id, obj->hdr_chunk,
- chunk_id_ok ? "" : ",out of range",
- chunk_wrongly_deleted ? ",marked as deleted" : "");
-
- if (chunk_valid && !yaffs_skip_nand_verification(dev)) {
- struct yaffs_ext_tags tags;
- struct yaffs_obj_hdr *oh;
- u8 *buffer = yaffs_get_temp_buffer(dev, __LINE__);
-
- oh = (struct yaffs_obj_hdr *)buffer;
-
- yaffs_rd_chunk_tags_nand(dev, obj->hdr_chunk, buffer, &tags);
-
- yaffs_verify_oh(obj, oh, &tags, 1);
-
- yaffs_release_temp_buffer(dev, buffer, __LINE__);
- }
-
- /* Verify it has a parent */
- if (obj && !obj->fake && (!obj->parent || obj->parent->my_dev != dev)) {
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d has parent pointer %p which does not look like an object",
- obj->obj_id, obj->parent);
- }
-
- /* Verify parent is a directory */
- if (obj->parent
- && obj->parent->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d's parent is not a directory (type %d)",
- obj->obj_id, obj->parent->variant_type);
- }
-
- switch (obj->variant_type) {
- case YAFFS_OBJECT_TYPE_FILE:
- yaffs_verify_file(obj);
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- yaffs_verify_symlink(obj);
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- yaffs_verify_dir(obj);
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- yaffs_verify_link(obj);
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- yaffs_verify_special(obj);
- break;
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- default:
- yaffs_trace(YAFFS_TRACE_VERIFY,
- "Obj %d has illegaltype %d",
- obj->obj_id, obj->variant_type);
- break;
- }
-}
-
-void yaffs_verify_objects(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj;
- int i;
- struct list_head *lh;
-
- if (yaffs_skip_verification(dev))
- return;
-
- /* Iterate through the objects in each hash entry */
-
- for (i = 0; i < YAFFS_NOBJECT_BUCKETS; i++) {
- list_for_each(lh, &dev->obj_bucket[i].list) {
- if (lh) {
- obj =
- list_entry(lh, struct yaffs_obj, hash_link);
- yaffs_verify_obj(obj);
- }
- }
- }
-}
-
-void yaffs_verify_obj_in_dir(struct yaffs_obj *obj)
-{
- struct list_head *lh;
- struct yaffs_obj *list_obj;
-
- int count = 0;
-
- if (!obj) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "No object to verify");
- YBUG();
- return;
- }
-
- if (yaffs_skip_verification(obj->my_dev))
- return;
-
- if (!obj->parent) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "Object does not have parent" );
- YBUG();
- return;
- }
-
- if (obj->parent->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "Parent is not directory");
- YBUG();
- }
-
- /* Iterate through the objects in each hash entry */
-
- list_for_each(lh, &obj->parent->variant.dir_variant.children) {
- if (lh) {
- list_obj = list_entry(lh, struct yaffs_obj, siblings);
- yaffs_verify_obj(list_obj);
- if (obj == list_obj)
- count++;
- }
- }
-
- if (count != 1) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Object in directory %d times",
- count);
- YBUG();
- }
-}
-
-void yaffs_verify_dir(struct yaffs_obj *directory)
-{
- struct list_head *lh;
- struct yaffs_obj *list_obj;
-
- if (!directory) {
- YBUG();
- return;
- }
-
- if (yaffs_skip_full_verification(directory->my_dev))
- return;
-
- if (directory->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Directory has wrong type: %d",
- directory->variant_type);
- YBUG();
- }
-
- /* Iterate through the objects in each hash entry */
-
- list_for_each(lh, &directory->variant.dir_variant.children) {
- if (lh) {
- list_obj = list_entry(lh, struct yaffs_obj, siblings);
- if (list_obj->parent != directory) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Object in directory list has wrong parent %p",
- list_obj->parent);
- YBUG();
- }
- yaffs_verify_obj_in_dir(list_obj);
- }
- }
-}
-
-static int yaffs_free_verification_failures;
-
-void yaffs_verify_free_chunks(struct yaffs_dev *dev)
-{
- int counted;
- int difference;
-
- if (yaffs_skip_verification(dev))
- return;
-
- counted = yaffs_count_free_chunks(dev);
-
- difference = dev->n_free_chunks - counted;
-
- if (difference) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Freechunks verification failure %d %d %d",
- dev->n_free_chunks, counted, difference);
- yaffs_free_verification_failures++;
- }
-}
-
-int yaffs_verify_file_sane(struct yaffs_obj *in)
-{
- in = in;
- return YAFFS_OK;
-}
-
diff --git a/fs/yaffs2/yaffs_verify.h b/fs/yaffs2/yaffs_verify.h
deleted file mode 100644
index cc6f889..0000000
--- a/fs/yaffs2/yaffs_verify.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_VERIFY_H__
-#define __YAFFS_VERIFY_H__
-
-#include "yaffs_guts.h"
-
-void yaffs_verify_blk(struct yaffs_dev *dev, struct yaffs_block_info *bi,
- int n);
-void yaffs_verify_collected_blk(struct yaffs_dev *dev,
- struct yaffs_block_info *bi, int n);
-void yaffs_verify_blocks(struct yaffs_dev *dev);
-
-void yaffs_verify_oh(struct yaffs_obj *obj, struct yaffs_obj_hdr *oh,
- struct yaffs_ext_tags *tags, int parent_check);
-void yaffs_verify_file(struct yaffs_obj *obj);
-void yaffs_verify_link(struct yaffs_obj *obj);
-void yaffs_verify_symlink(struct yaffs_obj *obj);
-void yaffs_verify_special(struct yaffs_obj *obj);
-void yaffs_verify_obj(struct yaffs_obj *obj);
-void yaffs_verify_objects(struct yaffs_dev *dev);
-void yaffs_verify_obj_in_dir(struct yaffs_obj *obj);
-void yaffs_verify_dir(struct yaffs_obj *directory);
-void yaffs_verify_free_chunks(struct yaffs_dev *dev);
-
-int yaffs_verify_file_sane(struct yaffs_obj *obj);
-
-int yaffs_skip_verification(struct yaffs_dev *dev);
-
-#endif
diff --git a/fs/yaffs2/yaffs_vfs.c b/fs/yaffs2/yaffs_vfs.c
deleted file mode 100644
index d5b8753..0000000
--- a/fs/yaffs2/yaffs_vfs.c
+++ /dev/null
@@ -1,2792 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- * Acknowledgements:
- * Luc van OostenRyck for numerous patches.
- * Nick Bane for numerous patches.
- * Nick Bane for 2.5/2.6 integration.
- * Andras Toth for mknod rdev issue.
- * Michael Fischer for finding the problem with inode inconsistency.
- * Some code bodily lifted from JFFS
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- *
- * This is the file system front-end to YAFFS that hooks it up to
- * the VFS.
- *
- * Special notes:
- * >> 2.4: sb->u.generic_sbp points to the struct yaffs_dev associated with
- * this superblock
- * >> 2.6: sb->s_fs_info points to the struct yaffs_dev associated with this
- * superblock
- * >> inode->u.generic_ip points to the associated struct yaffs_obj.
- */
-
-/*
- * NB There are two variants of Linux VFS glue code. This variant supports
- * a single version and should not include any multi-version code.
- */
-#include <linux/version.h>
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/fs.h>
-#include <linux/proc_fs.h>
-#include <linux/smp_lock.h>
-#include <linux/pagemap.h>
-#include <linux/mtd/mtd.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/namei.h>
-#include <linux/exportfs.h>
-#include <linux/kthread.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-
-#include <asm/div64.h>
-
-#include <linux/statfs.h>
-
-#define UnlockPage(p) unlock_page(p)
-#define Page_Uptodate(page) test_bit(PG_uptodate, &(page)->flags)
-
-#define yaffs_devname(sb, buf) bdevname(sb->s_bdev, buf)
-
-#define YPROC_ROOT NULL
-
-#define Y_INIT_TIMER(a) init_timer_on_stack(a)
-
-#define WRITE_SIZE_STR "writesize"
-#define WRITE_SIZE(mtd) ((mtd)->writesize)
-
-static uint32_t YCALCBLOCKS(uint64_t partition_size, uint32_t block_size)
-{
- uint64_t result = partition_size;
- do_div(result, block_size);
- return (uint32_t) result;
-}
-
-#include <linux/uaccess.h>
-#include <linux/mtd/mtd.h>
-
-#include "yportenv.h"
-#include "yaffs_trace.h"
-#include "yaffs_guts.h"
-#include "yaffs_attribs.h"
-
-#include "yaffs_linux.h"
-
-#include "yaffs_mtdif.h"
-#include "yaffs_mtdif1.h"
-#include "yaffs_mtdif2.h"
-
-unsigned int yaffs_trace_mask = YAFFS_TRACE_BAD_BLOCKS | YAFFS_TRACE_ALWAYS;
-unsigned int yaffs_wr_attempts = YAFFS_WR_ATTEMPTS;
-unsigned int yaffs_auto_checkpoint = 1;
-unsigned int yaffs_gc_control = 1;
-unsigned int yaffs_bg_enable = 1;
-
-/* Module Parameters */
-module_param(yaffs_trace_mask, uint, 0644);
-module_param(yaffs_wr_attempts, uint, 0644);
-module_param(yaffs_auto_checkpoint, uint, 0644);
-module_param(yaffs_gc_control, uint, 0644);
-module_param(yaffs_bg_enable, uint, 0644);
-
-
-#define yaffs_inode_to_obj_lv(iptr) ((iptr)->i_private)
-#define yaffs_inode_to_obj(iptr) ((struct yaffs_obj *)(yaffs_inode_to_obj_lv(iptr)))
-#define yaffs_dentry_to_obj(dptr) yaffs_inode_to_obj((dptr)->d_inode)
-#define yaffs_super_to_dev(sb) ((struct yaffs_dev *)sb->s_fs_info)
-
-#define update_dir_time(dir) do {\
- (dir)->i_ctime = (dir)->i_mtime = CURRENT_TIME; \
- } while(0)
-
-
-static unsigned yaffs_gc_control_callback(struct yaffs_dev *dev)
-{
- return yaffs_gc_control;
-}
-
-static void yaffs_gross_lock(struct yaffs_dev *dev)
-{
- yaffs_trace(YAFFS_TRACE_LOCK, "yaffs locking %p", current);
- mutex_lock(&(yaffs_dev_to_lc(dev)->gross_lock));
- yaffs_trace(YAFFS_TRACE_LOCK, "yaffs locked %p", current);
-}
-
-static void yaffs_gross_unlock(struct yaffs_dev *dev)
-{
- yaffs_trace(YAFFS_TRACE_LOCK, "yaffs unlocking %p", current);
- mutex_unlock(&(yaffs_dev_to_lc(dev)->gross_lock));
-}
-
-static void yaffs_fill_inode_from_obj(struct inode *inode,
- struct yaffs_obj *obj);
-
-static struct inode *yaffs_iget(struct super_block *sb, unsigned long ino)
-{
- struct inode *inode;
- struct yaffs_obj *obj;
- struct yaffs_dev *dev = yaffs_super_to_dev(sb);
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_iget for %lu", ino);
-
- inode = iget_locked(sb, ino);
- if (!inode)
- return ERR_PTR(-ENOMEM);
- if (!(inode->i_state & I_NEW))
- return inode;
-
- /* NB This is called as a side effect of other functions, but
- * we had to release the lock to prevent deadlocks, so
- * need to lock again.
- */
-
- yaffs_gross_lock(dev);
-
- obj = yaffs_find_by_number(dev, inode->i_ino);
-
- yaffs_fill_inode_from_obj(inode, obj);
-
- yaffs_gross_unlock(dev);
-
- unlock_new_inode(inode);
- return inode;
-}
-
-struct inode *yaffs_get_inode(struct super_block *sb, int mode, int dev,
- struct yaffs_obj *obj)
-{
- struct inode *inode;
-
- if (!sb) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_get_inode for NULL super_block!!");
- return NULL;
-
- }
-
- if (!obj) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_get_inode for NULL object!!");
- return NULL;
-
- }
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_get_inode for object %d",
- obj->obj_id);
-
- inode = yaffs_iget(sb, obj->obj_id);
- if (IS_ERR(inode))
- return NULL;
-
- /* NB Side effect: iget calls back to yaffs_read_inode(). */
- /* iget also increments the inode's i_count */
- /* NB You can't be holding gross_lock or deadlock will happen! */
-
- return inode;
-}
-
-static int yaffs_mknod(struct inode *dir, struct dentry *dentry, int mode,
- dev_t rdev)
-{
- struct inode *inode;
-
- struct yaffs_obj *obj = NULL;
- struct yaffs_dev *dev;
-
- struct yaffs_obj *parent = yaffs_inode_to_obj(dir);
-
- int error = -ENOSPC;
- uid_t uid = current->cred->fsuid;
- gid_t gid =
- (dir->i_mode & S_ISGID) ? dir->i_gid : current->cred->fsgid;
-
- if ((dir->i_mode & S_ISGID) && S_ISDIR(mode))
- mode |= S_ISGID;
-
- if (parent) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_mknod: parent object %d type %d",
- parent->obj_id, parent->variant_type);
- } else {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_mknod: could not get parent object");
- return -EPERM;
- }
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_mknod: making oject for %s, mode %x dev %x",
- dentry->d_name.name, mode, rdev);
-
- dev = parent->my_dev;
-
- yaffs_gross_lock(dev);
-
- switch (mode & S_IFMT) {
- default:
- /* Special (socket, fifo, device...) */
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making special");
- obj =
- yaffs_create_special(parent, dentry->d_name.name, mode, uid,
- gid, old_encode_dev(rdev));
- break;
- case S_IFREG: /* file */
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making file");
- obj = yaffs_create_file(parent, dentry->d_name.name, mode, uid,
- gid);
- break;
- case S_IFDIR: /* directory */
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making directory");
- obj = yaffs_create_dir(parent, dentry->d_name.name, mode,
- uid, gid);
- break;
- case S_IFLNK: /* symlink */
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod: making symlink");
- obj = NULL; /* Do we ever get here? */
- break;
- }
-
- /* Can not call yaffs_get_inode() with gross lock held */
- yaffs_gross_unlock(dev);
-
- if (obj) {
- inode = yaffs_get_inode(dir->i_sb, mode, rdev, obj);
- d_instantiate(dentry, inode);
- update_dir_time(dir);
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_mknod created object %d count = %d",
- obj->obj_id, atomic_read(&inode->i_count));
- error = 0;
- yaffs_fill_inode_from_obj(dir, parent);
- } else {
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_mknod failed making object");
- error = -ENOMEM;
- }
-
- return error;
-}
-
-static int yaffs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
- return yaffs_mknod(dir, dentry, mode | S_IFDIR, 0);
-}
-
-static int yaffs_create(struct inode *dir, struct dentry *dentry, int mode,
- struct nameidata *n)
-{
- return yaffs_mknod(dir, dentry, mode | S_IFREG, 0);
-}
-
-static int yaffs_link(struct dentry *old_dentry, struct inode *dir,
- struct dentry *dentry)
-{
- struct inode *inode = old_dentry->d_inode;
- struct yaffs_obj *obj = NULL;
- struct yaffs_obj *link = NULL;
- struct yaffs_dev *dev;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_link");
-
- obj = yaffs_inode_to_obj(inode);
- dev = obj->my_dev;
-
- yaffs_gross_lock(dev);
-
- if (!S_ISDIR(inode->i_mode)) /* Don't link directories */
- link =
- yaffs_link_obj(yaffs_inode_to_obj(dir), dentry->d_name.name,
- obj);
-
- if (link) {
- old_dentry->d_inode->i_nlink = yaffs_get_obj_link_count(obj);
- d_instantiate(dentry, old_dentry->d_inode);
- atomic_inc(&old_dentry->d_inode->i_count);
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_link link count %d i_count %d",
- old_dentry->d_inode->i_nlink,
- atomic_read(&old_dentry->d_inode->i_count));
- }
-
- yaffs_gross_unlock(dev);
-
- if (link) {
- update_dir_time(dir);
- return 0;
- }
-
- return -EPERM;
-}
-
-static int yaffs_symlink(struct inode *dir, struct dentry *dentry,
- const char *symname)
-{
- struct yaffs_obj *obj;
- struct yaffs_dev *dev;
- uid_t uid = current->cred->fsuid;
- gid_t gid =
- (dir->i_mode & S_ISGID) ? dir->i_gid : current->cred->fsgid;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_symlink");
-
- dev = yaffs_inode_to_obj(dir)->my_dev;
- yaffs_gross_lock(dev);
- obj = yaffs_create_symlink(yaffs_inode_to_obj(dir), dentry->d_name.name,
- S_IFLNK | S_IRWXUGO, uid, gid, symname);
- yaffs_gross_unlock(dev);
-
- if (obj) {
- struct inode *inode;
-
- inode = yaffs_get_inode(dir->i_sb, obj->yst_mode, 0, obj);
- d_instantiate(dentry, inode);
- update_dir_time(dir);
- yaffs_trace(YAFFS_TRACE_OS, "symlink created OK");
- return 0;
- } else {
- yaffs_trace(YAFFS_TRACE_OS, "symlink not created");
- }
-
- return -ENOMEM;
-}
-
-static struct dentry *yaffs_lookup(struct inode *dir, struct dentry *dentry,
- struct nameidata *n)
-{
- struct yaffs_obj *obj;
- struct inode *inode = NULL;
-
- struct yaffs_dev *dev = yaffs_inode_to_obj(dir)->my_dev;
-
- if (current != yaffs_dev_to_lc(dev)->readdir_process)
- yaffs_gross_lock(dev);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_lookup for %d:%s",
- yaffs_inode_to_obj(dir)->obj_id, dentry->d_name.name);
-
- obj = yaffs_find_by_name(yaffs_inode_to_obj(dir), dentry->d_name.name);
-
- obj = yaffs_get_equivalent_obj(obj); /* in case it was a hardlink */
-
- /* Can't hold gross lock when calling yaffs_get_inode() */
- if (current != yaffs_dev_to_lc(dev)->readdir_process)
- yaffs_gross_unlock(dev);
-
- if (obj) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_lookup found %d", obj->obj_id);
-
- inode = yaffs_get_inode(dir->i_sb, obj->yst_mode, 0, obj);
-
- if (inode) {
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_loookup dentry");
- d_add(dentry, inode);
- /* return dentry; */
- return NULL;
- }
-
- } else {
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_lookup not found");
-
- }
-
- d_add(dentry, inode);
-
- return NULL;
-}
-
-static int yaffs_unlink(struct inode *dir, struct dentry *dentry)
-{
- int ret_val;
-
- struct yaffs_dev *dev;
- struct yaffs_obj *obj;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_unlink %d:%s",
- (int)(dir->i_ino), dentry->d_name.name);
- obj = yaffs_inode_to_obj(dir);
- dev = obj->my_dev;
-
- yaffs_gross_lock(dev);
-
- ret_val = yaffs_unlinker(obj, dentry->d_name.name);
-
- if (ret_val == YAFFS_OK) {
- dentry->d_inode->i_nlink--;
- dir->i_version++;
- yaffs_gross_unlock(dev);
- mark_inode_dirty(dentry->d_inode);
- update_dir_time(dir);
- return 0;
- }
- yaffs_gross_unlock(dev);
- return -ENOTEMPTY;
-}
-
-static int yaffs_sync_object(struct file *file, int datasync)
-{
-
- struct yaffs_obj *obj;
- struct yaffs_dev *dev;
- struct dentry *dentry = file->f_path.dentry;
-
- obj = yaffs_dentry_to_obj(dentry);
-
- dev = obj->my_dev;
-
- yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC, "yaffs_sync_object");
- yaffs_gross_lock(dev);
- yaffs_flush_file(obj, 1, datasync);
- yaffs_gross_unlock(dev);
- return 0;
-}
-/*
- * The VFS layer already does all the dentry stuff for rename.
- *
- * NB: POSIX says you can rename an object over an old object of the same name
- */
-static int yaffs_rename(struct inode *old_dir, struct dentry *old_dentry,
- struct inode *new_dir, struct dentry *new_dentry)
-{
- struct yaffs_dev *dev;
- int ret_val = YAFFS_FAIL;
- struct yaffs_obj *target;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_rename");
- dev = yaffs_inode_to_obj(old_dir)->my_dev;
-
- yaffs_gross_lock(dev);
-
- /* Check if the target is an existing directory that is not empty. */
- target = yaffs_find_by_name(yaffs_inode_to_obj(new_dir),
- new_dentry->d_name.name);
-
- if (target && target->variant_type == YAFFS_OBJECT_TYPE_DIRECTORY &&
- !list_empty(&target->variant.dir_variant.children)) {
-
- yaffs_trace(YAFFS_TRACE_OS, "target is non-empty dir");
-
- ret_val = YAFFS_FAIL;
- } else {
- /* Now does unlinking internally using shadowing mechanism */
- yaffs_trace(YAFFS_TRACE_OS, "calling yaffs_rename_obj");
-
- ret_val = yaffs_rename_obj(yaffs_inode_to_obj(old_dir),
- old_dentry->d_name.name,
- yaffs_inode_to_obj(new_dir),
- new_dentry->d_name.name);
- }
- yaffs_gross_unlock(dev);
-
- if (ret_val == YAFFS_OK) {
- if (target) {
- new_dentry->d_inode->i_nlink--;
- mark_inode_dirty(new_dentry->d_inode);
- }
-
- update_dir_time(old_dir);
- if (old_dir != new_dir)
- update_dir_time(new_dir);
- return 0;
- } else {
- return -ENOTEMPTY;
- }
-}
-
-static int yaffs_setattr(struct dentry *dentry, struct iattr *attr)
-{
- struct inode *inode = dentry->d_inode;
- int error = 0;
- struct yaffs_dev *dev;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_setattr of object %d",
- yaffs_inode_to_obj(inode)->obj_id);
-
- /* Fail if a requested resize >= 2GB */
- if (attr->ia_valid & ATTR_SIZE && (attr->ia_size >> 31))
- error = -EINVAL;
-
- if (error == 0)
- error = inode_change_ok(inode, attr);
- if (error == 0) {
- int result;
- if (!error) {
- setattr_copy(inode, attr);
- yaffs_trace(YAFFS_TRACE_OS, "inode_setattr called");
- if (attr->ia_valid & ATTR_SIZE) {
- truncate_setsize(inode, attr->ia_size);
- inode->i_blocks = (inode->i_size + 511) >> 9;
- }
- }
- dev = yaffs_inode_to_obj(inode)->my_dev;
- if (attr->ia_valid & ATTR_SIZE) {
- yaffs_trace(YAFFS_TRACE_OS, "resize to %d(%x)",
- (int)(attr->ia_size),
- (int)(attr->ia_size));
- }
- yaffs_gross_lock(dev);
- result = yaffs_set_attribs(yaffs_inode_to_obj(inode), attr);
- if (result == YAFFS_OK) {
- error = 0;
- } else {
- error = -EPERM;
- }
- yaffs_gross_unlock(dev);
-
- }
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_setattr done returning %d", error);
-
- return error;
-}
-
-#ifdef CONFIG_YAFFS_XATTR
-static int yaffs_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags)
-{
- struct inode *inode = dentry->d_inode;
- int error = 0;
- struct yaffs_dev *dev;
- struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_setxattr of object %d", obj->obj_id);
-
- if (error == 0) {
- int result;
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
- result = yaffs_set_xattrib(obj, name, value, size, flags);
- if (result == YAFFS_OK)
- error = 0;
- else if (result < 0)
- error = result;
- yaffs_gross_unlock(dev);
-
- }
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_setxattr done returning %d", error);
-
- return error;
-}
-
-static ssize_t yaffs_getxattr(struct dentry * dentry, const char *name, void *buff,
- size_t size)
-{
- struct inode *inode = dentry->d_inode;
- int error = 0;
- struct yaffs_dev *dev;
- struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_getxattr \"%s\" from object %d",
- name, obj->obj_id);
-
- if (error == 0) {
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
- error = yaffs_get_xattrib(obj, name, buff, size);
- yaffs_gross_unlock(dev);
-
- }
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_getxattr done returning %d", error);
-
- return error;
-}
-
-static int yaffs_removexattr(struct dentry *dentry, const char *name)
-{
- struct inode *inode = dentry->d_inode;
- int error = 0;
- struct yaffs_dev *dev;
- struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_removexattr of object %d", obj->obj_id);
-
- if (error == 0) {
- int result;
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
- result = yaffs_remove_xattrib(obj, name);
- if (result == YAFFS_OK)
- error = 0;
- else if (result < 0)
- error = result;
- yaffs_gross_unlock(dev);
-
- }
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_removexattr done returning %d", error);
-
- return error;
-}
-
-static ssize_t yaffs_listxattr(struct dentry * dentry, char *buff, size_t size)
-{
- struct inode *inode = dentry->d_inode;
- int error = 0;
- struct yaffs_dev *dev;
- struct yaffs_obj *obj = yaffs_inode_to_obj(inode);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_listxattr of object %d", obj->obj_id);
-
- if (error == 0) {
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
- error = yaffs_list_xattrib(obj, buff, size);
- yaffs_gross_unlock(dev);
-
- }
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_listxattr done returning %d", error);
-
- return error;
-}
-
-#endif
-
-static const struct inode_operations yaffs_dir_inode_operations = {
- .create = yaffs_create,
- .lookup = yaffs_lookup,
- .link = yaffs_link,
- .unlink = yaffs_unlink,
- .symlink = yaffs_symlink,
- .mkdir = yaffs_mkdir,
- .rmdir = yaffs_unlink,
- .mknod = yaffs_mknod,
- .rename = yaffs_rename,
- .setattr = yaffs_setattr,
-#ifdef CONFIG_YAFFS_XATTR
- .setxattr = yaffs_setxattr,
- .getxattr = yaffs_getxattr,
- .listxattr = yaffs_listxattr,
- .removexattr = yaffs_removexattr,
-#endif
-};
-/*-----------------------------------------------------------------*/
-/* Directory search context allows us to unlock access to yaffs during
- * filldir without causing problems with the directory being modified.
- * This is similar to the tried and tested mechanism used in yaffs direct.
- *
- * A search context iterates along a doubly linked list of siblings in the
- * directory. If the iterating object is deleted then this would corrupt
- * the list iteration, likely causing a crash. The search context avoids
- * this by using the remove_obj_fn to move the search context to the
- * next object before the object is deleted.
- *
- * Many readdirs (and thus seach conexts) may be alive simulateously so
- * each struct yaffs_dev has a list of these.
- *
- * A seach context lives for the duration of a readdir.
- *
- * All these functions must be called while yaffs is locked.
- */
-
-struct yaffs_search_context {
- struct yaffs_dev *dev;
- struct yaffs_obj *dir_obj;
- struct yaffs_obj *next_return;
- struct list_head others;
-};
-
-/*
- * yaffs_new_search() creates a new search context, initialises it and
- * adds it to the device's search context list.
- *
- * Called at start of readdir.
- */
-static struct yaffs_search_context *yaffs_new_search(struct yaffs_obj *dir)
-{
- struct yaffs_dev *dev = dir->my_dev;
- struct yaffs_search_context *sc =
- kmalloc(sizeof(struct yaffs_search_context), GFP_NOFS);
- if (sc) {
- sc->dir_obj = dir;
- sc->dev = dev;
- if (list_empty(&sc->dir_obj->variant.dir_variant.children))
- sc->next_return = NULL;
- else
- sc->next_return =
- list_entry(dir->variant.dir_variant.children.next,
- struct yaffs_obj, siblings);
- INIT_LIST_HEAD(&sc->others);
- list_add(&sc->others, &(yaffs_dev_to_lc(dev)->search_contexts));
- }
- return sc;
-}
-
-/*
- * yaffs_search_end() disposes of a search context and cleans up.
- */
-static void yaffs_search_end(struct yaffs_search_context *sc)
-{
- if (sc) {
- list_del(&sc->others);
- kfree(sc);
- }
-}
-
-/*
- * yaffs_search_advance() moves a search context to the next object.
- * Called when the search iterates or when an object removal causes
- * the search context to be moved to the next object.
- */
-static void yaffs_search_advance(struct yaffs_search_context *sc)
-{
- if (!sc)
- return;
-
- if (sc->next_return == NULL ||
- list_empty(&sc->dir_obj->variant.dir_variant.children))
- sc->next_return = NULL;
- else {
- struct list_head *next = sc->next_return->siblings.next;
-
- if (next == &sc->dir_obj->variant.dir_variant.children)
- sc->next_return = NULL; /* end of list */
- else
- sc->next_return =
- list_entry(next, struct yaffs_obj, siblings);
- }
-}
-
-/*
- * yaffs_remove_obj_callback() is called when an object is unlinked.
- * We check open search contexts and advance any which are currently
- * on the object being iterated.
- */
-static void yaffs_remove_obj_callback(struct yaffs_obj *obj)
-{
-
- struct list_head *i;
- struct yaffs_search_context *sc;
- struct list_head *search_contexts =
- &(yaffs_dev_to_lc(obj->my_dev)->search_contexts);
-
- /* Iterate through the directory search contexts.
- * If any are currently on the object being removed, then advance
- * the search context to the next object to prevent a hanging pointer.
- */
- list_for_each(i, search_contexts) {
- if (i) {
- sc = list_entry(i, struct yaffs_search_context, others);
- if (sc->next_return == obj)
- yaffs_search_advance(sc);
- }
- }
-
-}
-
-static int yaffs_readdir(struct file *f, void *dirent, filldir_t filldir)
-{
- struct yaffs_obj *obj;
- struct yaffs_dev *dev;
- struct yaffs_search_context *sc;
- struct inode *inode = f->f_dentry->d_inode;
- unsigned long offset, curoffs;
- struct yaffs_obj *l;
- int ret_val = 0;
-
- char name[YAFFS_MAX_NAME_LENGTH + 1];
-
- obj = yaffs_dentry_to_obj(f->f_dentry);
- dev = obj->my_dev;
-
- yaffs_gross_lock(dev);
-
- yaffs_dev_to_lc(dev)->readdir_process = current;
-
- offset = f->f_pos;
-
- sc = yaffs_new_search(obj);
- if (!sc) {
- ret_val = -ENOMEM;
- goto out;
- }
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_readdir: starting at %d", (int)offset);
-
- if (offset == 0) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_readdir: entry . ino %d",
- (int)inode->i_ino);
- yaffs_gross_unlock(dev);
- if (filldir(dirent, ".", 1, offset, inode->i_ino, DT_DIR) < 0) {
- yaffs_gross_lock(dev);
- goto out;
- }
- yaffs_gross_lock(dev);
- offset++;
- f->f_pos++;
- }
- if (offset == 1) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_readdir: entry .. ino %d",
- (int)f->f_dentry->d_parent->d_inode->i_ino);
- yaffs_gross_unlock(dev);
- if (filldir(dirent, "..", 2, offset,
- f->f_dentry->d_parent->d_inode->i_ino,
- DT_DIR) < 0) {
- yaffs_gross_lock(dev);
- goto out;
- }
- yaffs_gross_lock(dev);
- offset++;
- f->f_pos++;
- }
-
- curoffs = 1;
-
- /* If the directory has changed since the open or last call to
- readdir, rewind to after the 2 canned entries. */
- if (f->f_version != inode->i_version) {
- offset = 2;
- f->f_pos = offset;
- f->f_version = inode->i_version;
- }
-
- while (sc->next_return) {
- curoffs++;
- l = sc->next_return;
- if (curoffs >= offset) {
- int this_inode = yaffs_get_obj_inode(l);
- int this_type = yaffs_get_obj_type(l);
-
- yaffs_get_obj_name(l, name, YAFFS_MAX_NAME_LENGTH + 1);
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_readdir: %s inode %d",
- name, yaffs_get_obj_inode(l));
-
- yaffs_gross_unlock(dev);
-
- if (filldir(dirent,
- name,
- strlen(name),
- offset, this_inode, this_type) < 0) {
- yaffs_gross_lock(dev);
- goto out;
- }
-
- yaffs_gross_lock(dev);
-
- offset++;
- f->f_pos++;
- }
- yaffs_search_advance(sc);
- }
-
-out:
- yaffs_search_end(sc);
- yaffs_dev_to_lc(dev)->readdir_process = NULL;
- yaffs_gross_unlock(dev);
-
- return ret_val;
-}
-
-static const struct file_operations yaffs_dir_operations = {
- .read = generic_read_dir,
- .readdir = yaffs_readdir,
- .fsync = yaffs_sync_object,
- .llseek = generic_file_llseek,
-};
-
-
-
-static int yaffs_file_flush(struct file *file, fl_owner_t id)
-{
- struct yaffs_obj *obj = yaffs_dentry_to_obj(file->f_dentry);
-
- struct yaffs_dev *dev = obj->my_dev;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_file_flush object %d (%s)",
- obj->obj_id, obj->dirty ? "dirty" : "clean");
-
- yaffs_gross_lock(dev);
-
- yaffs_flush_file(obj, 1, 0);
-
- yaffs_gross_unlock(dev);
-
- return 0;
-}
-
-static const struct file_operations yaffs_file_operations = {
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = generic_file_aio_write,
- .mmap = generic_file_mmap,
- .flush = yaffs_file_flush,
- .fsync = yaffs_sync_object,
- .splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
- .llseek = generic_file_llseek,
-};
-
-
-/* ExportFS support */
-static struct inode *yaffs2_nfs_get_inode(struct super_block *sb, uint64_t ino,
- uint32_t generation)
-{
- return yaffs_iget(sb, ino);
-}
-
-static struct dentry *yaffs2_fh_to_dentry(struct super_block *sb,
- struct fid *fid, int fh_len,
- int fh_type)
-{
- return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
- yaffs2_nfs_get_inode);
-}
-
-static struct dentry *yaffs2_fh_to_parent(struct super_block *sb,
- struct fid *fid, int fh_len,
- int fh_type)
-{
- return generic_fh_to_parent(sb, fid, fh_len, fh_type,
- yaffs2_nfs_get_inode);
-}
-
-struct dentry *yaffs2_get_parent(struct dentry *dentry)
-{
-
- struct super_block *sb = dentry->d_inode->i_sb;
- struct dentry *parent = ERR_PTR(-ENOENT);
- struct inode *inode;
- unsigned long parent_ino;
- struct yaffs_obj *d_obj;
- struct yaffs_obj *parent_obj;
-
- d_obj = yaffs_inode_to_obj(dentry->d_inode);
-
- if (d_obj) {
- parent_obj = d_obj->parent;
- if (parent_obj) {
- parent_ino = yaffs_get_obj_inode(parent_obj);
- inode = yaffs_iget(sb, parent_ino);
-
- if (IS_ERR(inode)) {
- parent = ERR_CAST(inode);
- } else {
- parent = d_obtain_alias(inode);
- if (!IS_ERR(parent)) {
- parent = ERR_PTR(-ENOMEM);
- iput(inode);
- }
- }
- }
- }
-
- return parent;
-}
-
-/* Just declare a zero structure as a NULL value implies
- * using the default functions of exportfs.
- */
-
-static struct export_operations yaffs_export_ops = {
- .fh_to_dentry = yaffs2_fh_to_dentry,
- .fh_to_parent = yaffs2_fh_to_parent,
- .get_parent = yaffs2_get_parent,
-};
-
-
-/*-----------------------------------------------------------------*/
-
-static int yaffs_readlink(struct dentry *dentry, char __user * buffer,
- int buflen)
-{
- unsigned char *alias;
- int ret;
-
- struct yaffs_dev *dev = yaffs_dentry_to_obj(dentry)->my_dev;
-
- yaffs_gross_lock(dev);
-
- alias = yaffs_get_symlink_alias(yaffs_dentry_to_obj(dentry));
-
- yaffs_gross_unlock(dev);
-
- if (!alias)
- return -ENOMEM;
-
- ret = vfs_readlink(dentry, buffer, buflen, alias);
- kfree(alias);
- return ret;
-}
-
-static void *yaffs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- unsigned char *alias;
- void *ret;
- struct yaffs_dev *dev = yaffs_dentry_to_obj(dentry)->my_dev;
-
- yaffs_gross_lock(dev);
-
- alias = yaffs_get_symlink_alias(yaffs_dentry_to_obj(dentry));
- yaffs_gross_unlock(dev);
-
- if (!alias) {
- ret = ERR_PTR(-ENOMEM);
- goto out;
- }
-
- nd_set_link(nd, alias);
- ret = (void *)alias;
-out:
- return ret;
-}
-
-void yaffs_put_link(struct dentry *dentry, struct nameidata *nd, void *alias)
-{
- kfree(alias);
-}
-
-
-static void yaffs_unstitch_obj(struct inode *inode, struct yaffs_obj *obj)
-{
- /* Clear the association between the inode and
- * the struct yaffs_obj.
- */
- obj->my_inode = NULL;
- yaffs_inode_to_obj_lv(inode) = NULL;
-
- /* If the object freeing was deferred, then the real
- * free happens now.
- * This should fix the inode inconsistency problem.
- */
- yaffs_handle_defered_free(obj);
-}
-
-/* yaffs_evict_inode combines into one operation what was previously done in
- * yaffs_clear_inode() and yaffs_delete_inode()
- *
- */
-static void yaffs_evict_inode(struct inode *inode)
-{
- struct yaffs_obj *obj;
- struct yaffs_dev *dev;
- int deleteme = 0;
-
- obj = yaffs_inode_to_obj(inode);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_evict_inode: ino %d, count %d %s",
- (int)inode->i_ino,
- atomic_read(&inode->i_count),
- obj ? "object exists" : "null object");
-
- if (!inode->i_nlink && !is_bad_inode(inode))
- deleteme = 1;
- truncate_inode_pages(&inode->i_data, 0);
- end_writeback(inode);
-
- if (deleteme && obj) {
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
- yaffs_del_obj(obj);
- yaffs_gross_unlock(dev);
- }
- if (obj) {
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
- yaffs_unstitch_obj(inode, obj);
- yaffs_gross_unlock(dev);
- }
-
-}
-
-static void yaffs_touch_super(struct yaffs_dev *dev)
-{
- struct super_block *sb = yaffs_dev_to_lc(dev)->super;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_touch_super() sb = %p", sb);
- if (sb)
- sb->s_dirt = 1;
-}
-
-static int yaffs_readpage_nolock(struct file *f, struct page *pg)
-{
- /* Lifted from jffs2 */
-
- struct yaffs_obj *obj;
- unsigned char *pg_buf;
- int ret;
-
- struct yaffs_dev *dev;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_readpage_nolock at %08x, size %08x",
- (unsigned)(pg->index << PAGE_CACHE_SHIFT),
- (unsigned)PAGE_CACHE_SIZE);
-
- obj = yaffs_dentry_to_obj(f->f_dentry);
-
- dev = obj->my_dev;
-
- BUG_ON(!PageLocked(pg));
-
- pg_buf = kmap(pg);
- /* FIXME: Can kmap fail? */
-
- yaffs_gross_lock(dev);
-
- ret = yaffs_file_rd(obj, pg_buf,
- pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
-
- yaffs_gross_unlock(dev);
-
- if (ret >= 0)
- ret = 0;
-
- if (ret) {
- ClearPageUptodate(pg);
- SetPageError(pg);
- } else {
- SetPageUptodate(pg);
- ClearPageError(pg);
- }
-
- flush_dcache_page(pg);
- kunmap(pg);
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage_nolock done");
- return ret;
-}
-
-static int yaffs_readpage_unlock(struct file *f, struct page *pg)
-{
- int ret = yaffs_readpage_nolock(f, pg);
- UnlockPage(pg);
- return ret;
-}
-
-static int yaffs_readpage(struct file *f, struct page *pg)
-{
- int ret;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage");
- ret = yaffs_readpage_unlock(f, pg);
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_readpage done");
- return ret;
-}
-
-/* writepage inspired by/stolen from smbfs */
-
-static int yaffs_writepage(struct page *page, struct writeback_control *wbc)
-{
- struct yaffs_dev *dev;
- struct address_space *mapping = page->mapping;
- struct inode *inode;
- unsigned long end_index;
- char *buffer;
- struct yaffs_obj *obj;
- int n_written = 0;
- unsigned n_bytes;
- loff_t i_size;
-
- if (!mapping)
- BUG();
- inode = mapping->host;
- if (!inode)
- BUG();
- i_size = i_size_read(inode);
-
- end_index = i_size >> PAGE_CACHE_SHIFT;
-
- if (page->index < end_index)
- n_bytes = PAGE_CACHE_SIZE;
- else {
- n_bytes = i_size & (PAGE_CACHE_SIZE - 1);
-
- if (page->index > end_index || !n_bytes) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_writepage at %08x, inode size = %08x!!!",
- (unsigned)(page->index << PAGE_CACHE_SHIFT),
- (unsigned)inode->i_size);
- yaffs_trace(YAFFS_TRACE_OS,
- " -> don't care!!");
-
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
- set_page_writeback(page);
- unlock_page(page);
- end_page_writeback(page);
- return 0;
- }
- }
-
- if (n_bytes != PAGE_CACHE_SIZE)
- zero_user_segment(page, n_bytes, PAGE_CACHE_SIZE);
-
- get_page(page);
-
- buffer = kmap(page);
-
- obj = yaffs_inode_to_obj(inode);
- dev = obj->my_dev;
- yaffs_gross_lock(dev);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_writepage at %08x, size %08x",
- (unsigned)(page->index << PAGE_CACHE_SHIFT), n_bytes);
- yaffs_trace(YAFFS_TRACE_OS,
- "writepag0: obj = %05x, ino = %05x",
- (int)obj->variant.file_variant.file_size, (int)inode->i_size);
-
- n_written = yaffs_wr_file(obj, buffer,
- page->index << PAGE_CACHE_SHIFT, n_bytes, 0);
-
- yaffs_touch_super(dev);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "writepag1: obj = %05x, ino = %05x",
- (int)obj->variant.file_variant.file_size, (int)inode->i_size);
-
- yaffs_gross_unlock(dev);
-
- kunmap(page);
- set_page_writeback(page);
- unlock_page(page);
- end_page_writeback(page);
- put_page(page);
-
- return (n_written == n_bytes) ? 0 : -ENOSPC;
-}
-
-/* Space holding and freeing is done to ensure we have space available for
- * write_begin/end.
- * For now we just assume few parallel writes and check against a small
- * number.
- * Todo: need to do this with a counter to handle parallel reads better.
- */
-
-static ssize_t yaffs_hold_space(struct file *f)
-{
- struct yaffs_obj *obj;
- struct yaffs_dev *dev;
-
- int n_free_chunks;
-
- obj = yaffs_dentry_to_obj(f->f_dentry);
-
- dev = obj->my_dev;
-
- yaffs_gross_lock(dev);
-
- n_free_chunks = yaffs_get_n_free_chunks(dev);
-
- yaffs_gross_unlock(dev);
-
- return (n_free_chunks > 20) ? 1 : 0;
-}
-
-static void yaffs_release_space(struct file *f)
-{
- struct yaffs_obj *obj;
- struct yaffs_dev *dev;
-
- obj = yaffs_dentry_to_obj(f->f_dentry);
-
- dev = obj->my_dev;
-
- yaffs_gross_lock(dev);
-
- yaffs_gross_unlock(dev);
-}
-
-static int yaffs_write_begin(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- struct page *pg = NULL;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-
- int ret = 0;
- int space_held = 0;
-
- /* Get a page */
- pg = grab_cache_page_write_begin(mapping, index, flags);
-
- *pagep = pg;
- if (!pg) {
- ret = -ENOMEM;
- goto out;
- }
- yaffs_trace(YAFFS_TRACE_OS,
- "start yaffs_write_begin index %d(%x) uptodate %d",
- (int)index, (int)index, Page_Uptodate(pg) ? 1 : 0);
-
- /* Get fs space */
- space_held = yaffs_hold_space(filp);
-
- if (!space_held) {
- ret = -ENOSPC;
- goto out;
- }
-
- /* Update page if required */
-
- if (!Page_Uptodate(pg))
- ret = yaffs_readpage_nolock(filp, pg);
-
- if (ret)
- goto out;
-
- /* Happy path return */
- yaffs_trace(YAFFS_TRACE_OS, "end yaffs_write_begin - ok");
-
- return 0;
-
-out:
- yaffs_trace(YAFFS_TRACE_OS,
- "end yaffs_write_begin fail returning %d", ret);
- if (space_held)
- yaffs_release_space(filp);
- if (pg) {
- unlock_page(pg);
- page_cache_release(pg);
- }
- return ret;
-}
-
-static ssize_t yaffs_file_write(struct file *f, const char *buf, size_t n,
- loff_t * pos)
-{
- struct yaffs_obj *obj;
- int n_written, ipos;
- struct inode *inode;
- struct yaffs_dev *dev;
-
- obj = yaffs_dentry_to_obj(f->f_dentry);
-
- dev = obj->my_dev;
-
- yaffs_gross_lock(dev);
-
- inode = f->f_dentry->d_inode;
-
- if (!S_ISBLK(inode->i_mode) && f->f_flags & O_APPEND)
- ipos = inode->i_size;
- else
- ipos = *pos;
-
- if (!obj)
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_file_write: hey obj is null!");
- else
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_file_write about to write writing %u(%x) bytes to object %d at %d(%x)",
- (unsigned)n, (unsigned)n, obj->obj_id, ipos, ipos);
-
- n_written = yaffs_wr_file(obj, buf, ipos, n, 0);
-
- yaffs_touch_super(dev);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_file_write: %d(%x) bytes written",
- (unsigned)n, (unsigned)n);
-
- if (n_written > 0) {
- ipos += n_written;
- *pos = ipos;
- if (ipos > inode->i_size) {
- inode->i_size = ipos;
- inode->i_blocks = (ipos + 511) >> 9;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_file_write size updated to %d bytes, %d blocks",
- ipos, (int)(inode->i_blocks));
- }
-
- }
- yaffs_gross_unlock(dev);
- return (n_written == 0) && (n > 0) ? -ENOSPC : n_written;
-}
-
-static int yaffs_write_end(struct file *filp, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *pg, void *fsdadata)
-{
- int ret = 0;
- void *addr, *kva;
- uint32_t offset_into_page = pos & (PAGE_CACHE_SIZE - 1);
-
- kva = kmap(pg);
- addr = kva + offset_into_page;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_write_end addr %p pos %x n_bytes %d",
- addr, (unsigned)pos, copied);
-
- ret = yaffs_file_write(filp, addr, copied, &pos);
-
- if (ret != copied) {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_write_end not same size ret %d copied %d",
- ret, copied);
- SetPageError(pg);
- }
-
- kunmap(pg);
-
- yaffs_release_space(filp);
- unlock_page(pg);
- page_cache_release(pg);
- return ret;
-}
-
-static int yaffs_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
- struct yaffs_dev *dev = yaffs_dentry_to_obj(dentry)->my_dev;
- struct super_block *sb = dentry->d_sb;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_statfs");
-
- yaffs_gross_lock(dev);
-
- buf->f_type = YAFFS_MAGIC;
- buf->f_bsize = sb->s_blocksize;
- buf->f_namelen = 255;
-
- if (dev->data_bytes_per_chunk & (dev->data_bytes_per_chunk - 1)) {
- /* Do this if chunk size is not a power of 2 */
-
- uint64_t bytes_in_dev;
- uint64_t bytes_free;
-
- bytes_in_dev =
- ((uint64_t)
- ((dev->param.end_block - dev->param.start_block +
- 1))) * ((uint64_t) (dev->param.chunks_per_block *
- dev->data_bytes_per_chunk));
-
- do_div(bytes_in_dev, sb->s_blocksize); /* bytes_in_dev becomes the number of blocks */
- buf->f_blocks = bytes_in_dev;
-
- bytes_free = ((uint64_t) (yaffs_get_n_free_chunks(dev))) *
- ((uint64_t) (dev->data_bytes_per_chunk));
-
- do_div(bytes_free, sb->s_blocksize);
-
- buf->f_bfree = bytes_free;
-
- } else if (sb->s_blocksize > dev->data_bytes_per_chunk) {
-
- buf->f_blocks =
- (dev->param.end_block - dev->param.start_block + 1) *
- dev->param.chunks_per_block /
- (sb->s_blocksize / dev->data_bytes_per_chunk);
- buf->f_bfree =
- yaffs_get_n_free_chunks(dev) /
- (sb->s_blocksize / dev->data_bytes_per_chunk);
- } else {
- buf->f_blocks =
- (dev->param.end_block - dev->param.start_block + 1) *
- dev->param.chunks_per_block *
- (dev->data_bytes_per_chunk / sb->s_blocksize);
-
- buf->f_bfree =
- yaffs_get_n_free_chunks(dev) *
- (dev->data_bytes_per_chunk / sb->s_blocksize);
- }
-
- buf->f_files = 0;
- buf->f_ffree = 0;
- buf->f_bavail = buf->f_bfree;
-
- yaffs_gross_unlock(dev);
- return 0;
-}
-
-static void yaffs_flush_inodes(struct super_block *sb)
-{
- struct inode *iptr;
- struct yaffs_obj *obj;
-
- list_for_each_entry(iptr, &sb->s_inodes, i_sb_list) {
- obj = yaffs_inode_to_obj(iptr);
- if (obj) {
- yaffs_trace(YAFFS_TRACE_OS,
- "flushing obj %d", obj->obj_id);
- yaffs_flush_file(obj, 1, 0);
- }
- }
-}
-
-static void yaffs_flush_super(struct super_block *sb, int do_checkpoint)
-{
- struct yaffs_dev *dev = yaffs_super_to_dev(sb);
- if (!dev)
- return;
-
- yaffs_flush_inodes(sb);
- yaffs_update_dirty_dirs(dev);
- yaffs_flush_whole_cache(dev);
- if (do_checkpoint)
- yaffs_checkpoint_save(dev);
-}
-
-static unsigned yaffs_bg_gc_urgency(struct yaffs_dev *dev)
-{
- unsigned erased_chunks =
- dev->n_erased_blocks * dev->param.chunks_per_block;
- struct yaffs_linux_context *context = yaffs_dev_to_lc(dev);
- unsigned scattered = 0; /* Free chunks not in an erased block */
-
- if (erased_chunks < dev->n_free_chunks)
- scattered = (dev->n_free_chunks - erased_chunks);
-
- if (!context->bg_running)
- return 0;
- else if (scattered < (dev->param.chunks_per_block * 2))
- return 0;
- else if (erased_chunks > dev->n_free_chunks / 2)
- return 0;
- else if (erased_chunks > dev->n_free_chunks / 4)
- return 1;
- else
- return 2;
-}
-
-static int yaffs_do_sync_fs(struct super_block *sb, int request_checkpoint)
-{
-
- struct yaffs_dev *dev = yaffs_super_to_dev(sb);
- unsigned int oneshot_checkpoint = (yaffs_auto_checkpoint & 4);
- unsigned gc_urgent = yaffs_bg_gc_urgency(dev);
- int do_checkpoint;
-
- yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC | YAFFS_TRACE_BACKGROUND,
- "yaffs_do_sync_fs: gc-urgency %d %s %s%s",
- gc_urgent,
- sb->s_dirt ? "dirty" : "clean",
- request_checkpoint ? "checkpoint requested" : "no checkpoint",
- oneshot_checkpoint ? " one-shot" : "");
-
- yaffs_gross_lock(dev);
- do_checkpoint = ((request_checkpoint && !gc_urgent) ||
- oneshot_checkpoint) && !dev->is_checkpointed;
-
- if (sb->s_dirt || do_checkpoint) {
- yaffs_flush_super(sb, !dev->is_checkpointed && do_checkpoint);
- sb->s_dirt = 0;
- if (oneshot_checkpoint)
- yaffs_auto_checkpoint &= ~4;
- }
- yaffs_gross_unlock(dev);
-
- return 0;
-}
-
-/*
- * yaffs background thread functions .
- * yaffs_bg_thread_fn() the thread function
- * yaffs_bg_start() launches the background thread.
- * yaffs_bg_stop() cleans up the background thread.
- *
- * NB:
- * The thread should only run after the yaffs is initialised
- * The thread should be stopped before yaffs is unmounted.
- * The thread should not do any writing while the fs is in read only.
- */
-
-void yaffs_background_waker(unsigned long data)
-{
- wake_up_process((struct task_struct *)data);
-}
-
-static int yaffs_bg_thread_fn(void *data)
-{
- struct yaffs_dev *dev = (struct yaffs_dev *)data;
- struct yaffs_linux_context *context = yaffs_dev_to_lc(dev);
- unsigned long now = jiffies;
- unsigned long next_dir_update = now;
- unsigned long next_gc = now;
- unsigned long expires;
- unsigned int urgency;
-
- int gc_result;
- struct timer_list timer;
-
- yaffs_trace(YAFFS_TRACE_BACKGROUND,
- "yaffs_background starting for dev %p", (void *)dev);
-
- set_freezable();
- while (context->bg_running) {
- yaffs_trace(YAFFS_TRACE_BACKGROUND, "yaffs_background");
-
- if (kthread_should_stop())
- break;
-
- if (try_to_freeze())
- continue;
-
- yaffs_gross_lock(dev);
-
- now = jiffies;
-
- if (time_after(now, next_dir_update) && yaffs_bg_enable) {
- yaffs_update_dirty_dirs(dev);
- next_dir_update = now + HZ;
- }
-
- if (time_after(now, next_gc) && yaffs_bg_enable) {
- if (!dev->is_checkpointed) {
- urgency = yaffs_bg_gc_urgency(dev);
- gc_result = yaffs_bg_gc(dev, urgency);
- if (urgency > 1)
- next_gc = now + HZ / 20 + 1;
- else if (urgency > 0)
- next_gc = now + HZ / 10 + 1;
- else
- next_gc = now + HZ * 2;
- } else {
- /*
- * gc not running so set to next_dir_update
- * to cut down on wake ups
- */
- next_gc = next_dir_update;
- }
- }
- yaffs_gross_unlock(dev);
- expires = next_dir_update;
- if (time_before(next_gc, expires))
- expires = next_gc;
- if (time_before(expires, now))
- expires = now + HZ;
-
- Y_INIT_TIMER(&timer);
- timer.expires = expires + 1;
- timer.data = (unsigned long)current;
- timer.function = yaffs_background_waker;
-
- set_current_state(TASK_INTERRUPTIBLE);
- add_timer(&timer);
- schedule();
- del_timer_sync(&timer);
- }
-
- return 0;
-}
-
-static int yaffs_bg_start(struct yaffs_dev *dev)
-{
- int retval = 0;
- struct yaffs_linux_context *context = yaffs_dev_to_lc(dev);
-
- if (dev->read_only)
- return -1;
-
- context->bg_running = 1;
-
- context->bg_thread = kthread_run(yaffs_bg_thread_fn,
- (void *)dev, "yaffs-bg-%d",
- context->mount_id);
-
- if (IS_ERR(context->bg_thread)) {
- retval = PTR_ERR(context->bg_thread);
- context->bg_thread = NULL;
- context->bg_running = 0;
- }
- return retval;
-}
-
-static void yaffs_bg_stop(struct yaffs_dev *dev)
-{
- struct yaffs_linux_context *ctxt = yaffs_dev_to_lc(dev);
-
- ctxt->bg_running = 0;
-
- if (ctxt->bg_thread) {
- kthread_stop(ctxt->bg_thread);
- ctxt->bg_thread = NULL;
- }
-}
-
-static void yaffs_write_super(struct super_block *sb)
-{
- unsigned request_checkpoint = (yaffs_auto_checkpoint >= 2);
-
- yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC | YAFFS_TRACE_BACKGROUND,
- "yaffs_write_super%s",
- request_checkpoint ? " checkpt" : "");
-
- yaffs_do_sync_fs(sb, request_checkpoint);
-
-}
-
-static int yaffs_sync_fs(struct super_block *sb, int wait)
-{
- unsigned request_checkpoint = (yaffs_auto_checkpoint >= 1);
-
- yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_SYNC,
- "yaffs_sync_fs%s", request_checkpoint ? " checkpt" : "");
-
- yaffs_do_sync_fs(sb, request_checkpoint);
-
- return 0;
-}
-
-
-static LIST_HEAD(yaffs_context_list);
-struct mutex yaffs_context_lock;
-
-
-
-struct yaffs_options {
- int inband_tags;
- int skip_checkpoint_read;
- int skip_checkpoint_write;
- int no_cache;
- int tags_ecc_on;
- int tags_ecc_overridden;
- int lazy_loading_enabled;
- int lazy_loading_overridden;
- int empty_lost_and_found;
- int empty_lost_and_found_overridden;
-};
-
-#define MAX_OPT_LEN 30
-static int yaffs_parse_options(struct yaffs_options *options,
- const char *options_str)
-{
- char cur_opt[MAX_OPT_LEN + 1];
- int p;
- int error = 0;
-
- /* Parse through the options which is a comma seperated list */
-
- while (options_str && *options_str && !error) {
- memset(cur_opt, 0, MAX_OPT_LEN + 1);
- p = 0;
-
- while (*options_str == ',')
- options_str++;
-
- while (*options_str && *options_str != ',') {
- if (p < MAX_OPT_LEN) {
- cur_opt[p] = *options_str;
- p++;
- }
- options_str++;
- }
-
- if (!strcmp(cur_opt, "inband-tags")) {
- options->inband_tags = 1;
- } else if (!strcmp(cur_opt, "tags-ecc-off")) {
- options->tags_ecc_on = 0;
- options->tags_ecc_overridden = 1;
- } else if (!strcmp(cur_opt, "tags-ecc-on")) {
- options->tags_ecc_on = 1;
- options->tags_ecc_overridden = 1;
- } else if (!strcmp(cur_opt, "lazy-loading-off")) {
- options->lazy_loading_enabled = 0;
- options->lazy_loading_overridden = 1;
- } else if (!strcmp(cur_opt, "lazy-loading-on")) {
- options->lazy_loading_enabled = 1;
- options->lazy_loading_overridden = 1;
- } else if (!strcmp(cur_opt, "empty-lost-and-found-off")) {
- options->empty_lost_and_found = 0;
- options->empty_lost_and_found_overridden = 1;
- } else if (!strcmp(cur_opt, "empty-lost-and-found-on")) {
- options->empty_lost_and_found = 1;
- options->empty_lost_and_found_overridden = 1;
- } else if (!strcmp(cur_opt, "no-cache")) {
- options->no_cache = 1;
- } else if (!strcmp(cur_opt, "no-checkpoint-read")) {
- options->skip_checkpoint_read = 1;
- } else if (!strcmp(cur_opt, "no-checkpoint-write")) {
- options->skip_checkpoint_write = 1;
- } else if (!strcmp(cur_opt, "no-checkpoint")) {
- options->skip_checkpoint_read = 1;
- options->skip_checkpoint_write = 1;
- } else {
- printk(KERN_INFO "yaffs: Bad mount option \"%s\"\n",
- cur_opt);
- error = 1;
- }
- }
-
- return error;
-}
-
-static struct address_space_operations yaffs_file_address_operations = {
- .readpage = yaffs_readpage,
- .writepage = yaffs_writepage,
- .write_begin = yaffs_write_begin,
- .write_end = yaffs_write_end,
-};
-
-
-
-static const struct inode_operations yaffs_file_inode_operations = {
- .setattr = yaffs_setattr,
-#ifdef CONFIG_YAFFS_XATTR
- .setxattr = yaffs_setxattr,
- .getxattr = yaffs_getxattr,
- .listxattr = yaffs_listxattr,
- .removexattr = yaffs_removexattr,
-#endif
-};
-
-static const struct inode_operations yaffs_symlink_inode_operations = {
- .readlink = yaffs_readlink,
- .follow_link = yaffs_follow_link,
- .put_link = yaffs_put_link,
- .setattr = yaffs_setattr,
-#ifdef CONFIG_YAFFS_XATTR
- .setxattr = yaffs_setxattr,
- .getxattr = yaffs_getxattr,
- .listxattr = yaffs_listxattr,
- .removexattr = yaffs_removexattr,
-#endif
-};
-
-static void yaffs_fill_inode_from_obj(struct inode *inode,
- struct yaffs_obj *obj)
-{
- if (inode && obj) {
-
- /* Check mode against the variant type and attempt to repair if broken. */
- u32 mode = obj->yst_mode;
- switch (obj->variant_type) {
- case YAFFS_OBJECT_TYPE_FILE:
- if (!S_ISREG(mode)) {
- obj->yst_mode &= ~S_IFMT;
- obj->yst_mode |= S_IFREG;
- }
-
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- if (!S_ISLNK(mode)) {
- obj->yst_mode &= ~S_IFMT;
- obj->yst_mode |= S_IFLNK;
- }
-
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- if (!S_ISDIR(mode)) {
- obj->yst_mode &= ~S_IFMT;
- obj->yst_mode |= S_IFDIR;
- }
-
- break;
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- case YAFFS_OBJECT_TYPE_HARDLINK:
- case YAFFS_OBJECT_TYPE_SPECIAL:
- default:
- /* TODO? */
- break;
- }
-
- inode->i_flags |= S_NOATIME;
-
- inode->i_ino = obj->obj_id;
- inode->i_mode = obj->yst_mode;
- inode->i_uid = obj->yst_uid;
- inode->i_gid = obj->yst_gid;
-
- inode->i_rdev = old_decode_dev(obj->yst_rdev);
-
- inode->i_atime.tv_sec = (time_t) (obj->yst_atime);
- inode->i_atime.tv_nsec = 0;
- inode->i_mtime.tv_sec = (time_t) obj->yst_mtime;
- inode->i_mtime.tv_nsec = 0;
- inode->i_ctime.tv_sec = (time_t) obj->yst_ctime;
- inode->i_ctime.tv_nsec = 0;
- inode->i_size = yaffs_get_obj_length(obj);
- inode->i_blocks = (inode->i_size + 511) >> 9;
-
- inode->i_nlink = yaffs_get_obj_link_count(obj);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_fill_inode mode %x uid %d gid %d size %d count %d",
- inode->i_mode, inode->i_uid, inode->i_gid,
- (int)inode->i_size, atomic_read(&inode->i_count));
-
- switch (obj->yst_mode & S_IFMT) {
- default: /* fifo, device or socket */
- init_special_inode(inode, obj->yst_mode,
- old_decode_dev(obj->yst_rdev));
- break;
- case S_IFREG: /* file */
- inode->i_op = &yaffs_file_inode_operations;
- inode->i_fop = &yaffs_file_operations;
- inode->i_mapping->a_ops =
- &yaffs_file_address_operations;
- break;
- case S_IFDIR: /* directory */
- inode->i_op = &yaffs_dir_inode_operations;
- inode->i_fop = &yaffs_dir_operations;
- break;
- case S_IFLNK: /* symlink */
- inode->i_op = &yaffs_symlink_inode_operations;
- break;
- }
-
- yaffs_inode_to_obj_lv(inode) = obj;
-
- obj->my_inode = inode;
-
- } else {
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_fill_inode invalid parameters");
- }
-}
-
-static void yaffs_put_super(struct super_block *sb)
-{
- struct yaffs_dev *dev = yaffs_super_to_dev(sb);
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_put_super");
-
- yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_BACKGROUND,
- "Shutting down yaffs background thread");
- yaffs_bg_stop(dev);
- yaffs_trace(YAFFS_TRACE_OS | YAFFS_TRACE_BACKGROUND,
- "yaffs background thread shut down");
-
- yaffs_gross_lock(dev);
-
- yaffs_flush_super(sb, 1);
-
- if (yaffs_dev_to_lc(dev)->put_super_fn)
- yaffs_dev_to_lc(dev)->put_super_fn(sb);
-
- yaffs_deinitialise(dev);
-
- yaffs_gross_unlock(dev);
- mutex_lock(&yaffs_context_lock);
- list_del_init(&(yaffs_dev_to_lc(dev)->context_list));
- mutex_unlock(&yaffs_context_lock);
-
- if (yaffs_dev_to_lc(dev)->spare_buffer) {
- kfree(yaffs_dev_to_lc(dev)->spare_buffer);
- yaffs_dev_to_lc(dev)->spare_buffer = NULL;
- }
-
- kfree(dev);
-}
-
-static void yaffs_mtd_put_super(struct super_block *sb)
-{
- struct mtd_info *mtd = yaffs_dev_to_mtd(yaffs_super_to_dev(sb));
-
- if (mtd->sync)
- mtd->sync(mtd);
-
- put_mtd_device(mtd);
-}
-
-static const struct super_operations yaffs_super_ops = {
- .statfs = yaffs_statfs,
- .put_super = yaffs_put_super,
- .evict_inode = yaffs_evict_inode,
- .sync_fs = yaffs_sync_fs,
- .write_super = yaffs_write_super,
-};
-
-static struct super_block *yaffs_internal_read_super(int yaffs_version,
- struct super_block *sb,
- void *data, int silent)
-{
- int n_blocks;
- struct inode *inode = NULL;
- struct dentry *root;
- struct yaffs_dev *dev = 0;
- char devname_buf[BDEVNAME_SIZE + 1];
- struct mtd_info *mtd;
- int err;
- char *data_str = (char *)data;
- struct yaffs_linux_context *context = NULL;
- struct yaffs_param *param;
-
- int read_only = 0;
-
- struct yaffs_options options;
-
- unsigned mount_id;
- int found;
- struct yaffs_linux_context *context_iterator;
- struct list_head *l;
-
- sb->s_magic = YAFFS_MAGIC;
- sb->s_op = &yaffs_super_ops;
- sb->s_flags |= MS_NOATIME;
-
- read_only = ((sb->s_flags & MS_RDONLY) != 0);
-
- sb->s_export_op = &yaffs_export_ops;
-
- if (!sb)
- printk(KERN_INFO "yaffs: sb is NULL\n");
- else if (!sb->s_dev)
- printk(KERN_INFO "yaffs: sb->s_dev is NULL\n");
- else if (!yaffs_devname(sb, devname_buf))
- printk(KERN_INFO "yaffs: devname is NULL\n");
- else
- printk(KERN_INFO "yaffs: dev is %d name is \"%s\" %s\n",
- sb->s_dev,
- yaffs_devname(sb, devname_buf), read_only ? "ro" : "rw");
-
- if (!data_str)
- data_str = "";
-
- printk(KERN_INFO "yaffs: passed flags \"%s\"\n", data_str);
-
- memset(&options, 0, sizeof(options));
-
- if (yaffs_parse_options(&options, data_str)) {
- /* Option parsing failed */
- return NULL;
- }
-
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_read_super: Using yaffs%d", yaffs_version);
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_read_super: block size %d", (int)(sb->s_blocksize));
-
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Attempting MTD mount of %u.%u,\"%s\"",
- MAJOR(sb->s_dev), MINOR(sb->s_dev),
- yaffs_devname(sb, devname_buf));
-
- /* Check it's an mtd device..... */
- if (MAJOR(sb->s_dev) != MTD_BLOCK_MAJOR)
- return NULL; /* This isn't an mtd device */
-
- /* Get the device */
- mtd = get_mtd_device(NULL, MINOR(sb->s_dev));
- if (!mtd) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "MTD device #%u doesn't appear to exist",
- MINOR(sb->s_dev));
- return NULL;
- }
- /* Check it's NAND */
- if (mtd->type != MTD_NANDFLASH) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "MTD device is not NAND it's type %d",
- mtd->type);
- return NULL;
- }
-
- yaffs_trace(YAFFS_TRACE_OS, " erase %p", mtd->erase);
- yaffs_trace(YAFFS_TRACE_OS, " read %p", mtd->read);
- yaffs_trace(YAFFS_TRACE_OS, " write %p", mtd->write);
- yaffs_trace(YAFFS_TRACE_OS, " readoob %p", mtd->read_oob);
- yaffs_trace(YAFFS_TRACE_OS, " writeoob %p", mtd->write_oob);
- yaffs_trace(YAFFS_TRACE_OS, " block_isbad %p", mtd->block_isbad);
- yaffs_trace(YAFFS_TRACE_OS, " block_markbad %p", mtd->block_markbad);
- yaffs_trace(YAFFS_TRACE_OS, " %s %d", WRITE_SIZE_STR, WRITE_SIZE(mtd));
- yaffs_trace(YAFFS_TRACE_OS, " oobsize %d", mtd->oobsize);
- yaffs_trace(YAFFS_TRACE_OS, " erasesize %d", mtd->erasesize);
- yaffs_trace(YAFFS_TRACE_OS, " size %lld", mtd->size);
-
-#ifdef CONFIG_YAFFS_AUTO_YAFFS2
-
- if (yaffs_version == 1 && WRITE_SIZE(mtd) >= 2048) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "auto selecting yaffs2");
- yaffs_version = 2;
- }
-
- /* Added NCB 26/5/2006 for completeness */
- if (yaffs_version == 2 && !options.inband_tags
- && WRITE_SIZE(mtd) == 512) {
- yaffs_trace(YAFFS_TRACE_ALWAYS, "auto selecting yaffs1");
- yaffs_version = 1;
- }
-#endif
-
- if (yaffs_version == 2) {
- /* Check for version 2 style functions */
- if (!mtd->erase ||
- !mtd->block_isbad ||
- !mtd->block_markbad ||
- !mtd->read ||
- !mtd->write || !mtd->read_oob || !mtd->write_oob) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "MTD device does not support required functions");
- return NULL;
- }
-
- if ((WRITE_SIZE(mtd) < YAFFS_MIN_YAFFS2_CHUNK_SIZE ||
- mtd->oobsize < YAFFS_MIN_YAFFS2_SPARE_SIZE) &&
- !options.inband_tags) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "MTD device does not have the right page sizes");
- return NULL;
- }
- } else {
- /* Check for V1 style functions */
- if (!mtd->erase ||
- !mtd->read ||
- !mtd->write || !mtd->read_oob || !mtd->write_oob) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "MTD device does not support required functions");
- return NULL;
- }
-
- if (WRITE_SIZE(mtd) < YAFFS_BYTES_PER_CHUNK ||
- mtd->oobsize != YAFFS_BYTES_PER_SPARE) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "MTD device does not support have the right page sizes");
- return NULL;
- }
- }
-
- /* OK, so if we got here, we have an MTD that's NAND and looks
- * like it has the right capabilities
- * Set the struct yaffs_dev up for mtd
- */
-
- if (!read_only && !(mtd->flags & MTD_WRITEABLE)) {
- read_only = 1;
- printk(KERN_INFO
- "yaffs: mtd is read only, setting superblock read only");
- sb->s_flags |= MS_RDONLY;
- }
-
- dev = kmalloc(sizeof(struct yaffs_dev), GFP_KERNEL);
- context = kmalloc(sizeof(struct yaffs_linux_context), GFP_KERNEL);
-
- if (!dev || !context) {
- if (dev)
- kfree(dev);
- if (context)
- kfree(context);
- dev = NULL;
- context = NULL;
- }
-
- if (!dev) {
- /* Deep shit could not allocate device structure */
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs_read_super failed trying to allocate yaffs_dev");
- return NULL;
- }
- memset(dev, 0, sizeof(struct yaffs_dev));
- param = &(dev->param);
-
- memset(context, 0, sizeof(struct yaffs_linux_context));
- dev->os_context = context;
- INIT_LIST_HEAD(&(context->context_list));
- context->dev = dev;
- context->super = sb;
-
- dev->read_only = read_only;
-
- sb->s_fs_info = dev;
-
- dev->driver_context = mtd;
- param->name = mtd->name;
-
- /* Set up the memory size parameters.... */
-
- n_blocks =
- YCALCBLOCKS(mtd->size,
- (YAFFS_CHUNKS_PER_BLOCK * YAFFS_BYTES_PER_CHUNK));
-
- param->start_block = 0;
- param->end_block = n_blocks - 1;
- param->chunks_per_block = YAFFS_CHUNKS_PER_BLOCK;
- param->total_bytes_per_chunk = YAFFS_BYTES_PER_CHUNK;
- param->n_reserved_blocks = 5;
- param->n_caches = (options.no_cache) ? 0 : 10;
- param->inband_tags = options.inband_tags;
-
-#ifdef CONFIG_YAFFS_DISABLE_LAZY_LOAD
- param->disable_lazy_load = 1;
-#endif
-#ifdef CONFIG_YAFFS_XATTR
- param->enable_xattr = 1;
-#endif
- if (options.lazy_loading_overridden)
- param->disable_lazy_load = !options.lazy_loading_enabled;
-
-#ifdef CONFIG_YAFFS_DISABLE_TAGS_ECC
- param->no_tags_ecc = 1;
-#endif
-
-#ifdef CONFIG_YAFFS_DISABLE_BACKGROUND
-#else
- param->defered_dir_update = 1;
-#endif
-
- if (options.tags_ecc_overridden)
- param->no_tags_ecc = !options.tags_ecc_on;
-
-#ifdef CONFIG_YAFFS_EMPTY_LOST_AND_FOUND
- param->empty_lost_n_found = 1;
-#endif
-
-#ifdef CONFIG_YAFFS_DISABLE_BLOCK_REFRESHING
- param->refresh_period = 0;
-#else
- param->refresh_period = 500;
-#endif
-
-#ifdef CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED
- param->always_check_erased = 1;
-#endif
-
- if (options.empty_lost_and_found_overridden)
- param->empty_lost_n_found = options.empty_lost_and_found;
-
- /* ... and the functions. */
- if (yaffs_version == 2) {
- param->write_chunk_tags_fn = nandmtd2_write_chunk_tags;
- param->read_chunk_tags_fn = nandmtd2_read_chunk_tags;
- param->bad_block_fn = nandmtd2_mark_block_bad;
- param->query_block_fn = nandmtd2_query_block;
- yaffs_dev_to_lc(dev)->spare_buffer =
- kmalloc(mtd->oobsize, GFP_NOFS);
- param->is_yaffs2 = 1;
- param->total_bytes_per_chunk = mtd->writesize;
- param->chunks_per_block = mtd->erasesize / mtd->writesize;
- n_blocks = YCALCBLOCKS(mtd->size, mtd->erasesize);
-
- param->start_block = 0;
- param->end_block = n_blocks - 1;
- } else {
- /* use the MTD interface in yaffs_mtdif1.c */
- param->write_chunk_tags_fn = nandmtd1_write_chunk_tags;
- param->read_chunk_tags_fn = nandmtd1_read_chunk_tags;
- param->bad_block_fn = nandmtd1_mark_block_bad;
- param->query_block_fn = nandmtd1_query_block;
- param->is_yaffs2 = 0;
- }
- /* ... and common functions */
- param->erase_fn = nandmtd_erase_block;
- param->initialise_flash_fn = nandmtd_initialise;
-
- yaffs_dev_to_lc(dev)->put_super_fn = yaffs_mtd_put_super;
-
- param->sb_dirty_fn = yaffs_touch_super;
- param->gc_control = yaffs_gc_control_callback;
-
- yaffs_dev_to_lc(dev)->super = sb;
-
-#ifndef CONFIG_YAFFS_DOES_ECC
- param->use_nand_ecc = 1;
-#endif
-
- param->skip_checkpt_rd = options.skip_checkpoint_read;
- param->skip_checkpt_wr = options.skip_checkpoint_write;
-
- mutex_lock(&yaffs_context_lock);
- /* Get a mount id */
- found = 0;
- for (mount_id = 0; !found; mount_id++) {
- found = 1;
- list_for_each(l, &yaffs_context_list) {
- context_iterator =
- list_entry(l, struct yaffs_linux_context,
- context_list);
- if (context_iterator->mount_id == mount_id)
- found = 0;
- }
- }
- context->mount_id = mount_id;
-
- list_add_tail(&(yaffs_dev_to_lc(dev)->context_list),
- &yaffs_context_list);
- mutex_unlock(&yaffs_context_lock);
-
- /* Directory search handling... */
- INIT_LIST_HEAD(&(yaffs_dev_to_lc(dev)->search_contexts));
- param->remove_obj_fn = yaffs_remove_obj_callback;
-
- mutex_init(&(yaffs_dev_to_lc(dev)->gross_lock));
-
- yaffs_gross_lock(dev);
-
- err = yaffs_guts_initialise(dev);
-
- yaffs_trace(YAFFS_TRACE_OS,
- "yaffs_read_super: guts initialised %s",
- (err == YAFFS_OK) ? "OK" : "FAILED");
-
- if (err == YAFFS_OK)
- yaffs_bg_start(dev);
-
- if (!context->bg_thread)
- param->defered_dir_update = 0;
-
- /* Release lock before yaffs_get_inode() */
- yaffs_gross_unlock(dev);
-
- /* Create root inode */
- if (err == YAFFS_OK)
- inode = yaffs_get_inode(sb, S_IFDIR | 0755, 0, yaffs_root(dev));
-
- if (!inode)
- return NULL;
-
- inode->i_op = &yaffs_dir_inode_operations;
- inode->i_fop = &yaffs_dir_operations;
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: got root inode");
-
- root = d_alloc_root(inode);
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: d_alloc_root done");
-
- if (!root) {
- iput(inode);
- return NULL;
- }
- sb->s_root = root;
- sb->s_dirt = !dev->is_checkpointed;
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs_read_super: is_checkpointed %d",
- dev->is_checkpointed);
-
- yaffs_trace(YAFFS_TRACE_OS, "yaffs_read_super: done");
- return sb;
-}
-
-static int yaffs_internal_read_super_mtd(struct super_block *sb, void *data,
- int silent)
-{
- return yaffs_internal_read_super(1, sb, data, silent) ? 0 : -EINVAL;
-}
-
-static int yaffs_read_super(struct file_system_type *fs,
- int flags, const char *dev_name,
- void *data, struct vfsmount *mnt)
-{
-
- return get_sb_bdev(fs, flags, dev_name, data,
- yaffs_internal_read_super_mtd, mnt);
-}
-
-static struct file_system_type yaffs_fs_type = {
- .owner = THIS_MODULE,
- .name = "yaffs",
- .get_sb = yaffs_read_super,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-
-#ifdef CONFIG_YAFFS_YAFFS2
-
-static int yaffs2_internal_read_super_mtd(struct super_block *sb, void *data,
- int silent)
-{
- return yaffs_internal_read_super(2, sb, data, silent) ? 0 : -EINVAL;
-}
-
-static int yaffs2_read_super(struct file_system_type *fs,
- int flags, const char *dev_name, void *data,
- struct vfsmount *mnt)
-{
- return get_sb_bdev(fs, flags, dev_name, data,
- yaffs2_internal_read_super_mtd, mnt);
-}
-
-static struct file_system_type yaffs2_fs_type = {
- .owner = THIS_MODULE,
- .name = "yaffs2",
- .get_sb = yaffs2_read_super,
- .kill_sb = kill_block_super,
- .fs_flags = FS_REQUIRES_DEV,
-};
-#endif /* CONFIG_YAFFS_YAFFS2 */
-
-static struct proc_dir_entry *my_proc_entry;
-
-static char *yaffs_dump_dev_part0(char *buf, struct yaffs_dev *dev)
-{
- struct yaffs_param *param = &dev->param;
- buf += sprintf(buf, "start_block........... %d\n", param->start_block);
- buf += sprintf(buf, "end_block............. %d\n", param->end_block);
- buf += sprintf(buf, "total_bytes_per_chunk. %d\n",
- param->total_bytes_per_chunk);
- buf += sprintf(buf, "use_nand_ecc.......... %d\n",
- param->use_nand_ecc);
- buf += sprintf(buf, "no_tags_ecc........... %d\n", param->no_tags_ecc);
- buf += sprintf(buf, "is_yaffs2............. %d\n", param->is_yaffs2);
- buf += sprintf(buf, "inband_tags........... %d\n", param->inband_tags);
- buf += sprintf(buf, "empty_lost_n_found.... %d\n",
- param->empty_lost_n_found);
- buf += sprintf(buf, "disable_lazy_load..... %d\n",
- param->disable_lazy_load);
- buf += sprintf(buf, "refresh_period........ %d\n",
- param->refresh_period);
- buf += sprintf(buf, "n_caches.............. %d\n", param->n_caches);
- buf += sprintf(buf, "n_reserved_blocks..... %d\n",
- param->n_reserved_blocks);
- buf += sprintf(buf, "always_check_erased... %d\n",
- param->always_check_erased);
-
- return buf;
-}
-
-static char *yaffs_dump_dev_part1(char *buf, struct yaffs_dev *dev)
-{
- buf +=
- sprintf(buf, "data_bytes_per_chunk.. %d\n",
- dev->data_bytes_per_chunk);
- buf += sprintf(buf, "chunk_grp_bits........ %d\n", dev->chunk_grp_bits);
- buf += sprintf(buf, "chunk_grp_size........ %d\n", dev->chunk_grp_size);
- buf +=
- sprintf(buf, "n_erased_blocks....... %d\n", dev->n_erased_blocks);
- buf +=
- sprintf(buf, "blocks_in_checkpt..... %d\n", dev->blocks_in_checkpt);
- buf += sprintf(buf, "\n");
- buf += sprintf(buf, "n_tnodes.............. %d\n", dev->n_tnodes);
- buf += sprintf(buf, "n_obj................. %d\n", dev->n_obj);
- buf += sprintf(buf, "n_free_chunks......... %d\n", dev->n_free_chunks);
- buf += sprintf(buf, "\n");
- buf += sprintf(buf, "n_page_writes......... %u\n", dev->n_page_writes);
- buf += sprintf(buf, "n_page_reads.......... %u\n", dev->n_page_reads);
- buf += sprintf(buf, "n_erasures............ %u\n", dev->n_erasures);
- buf += sprintf(buf, "n_gc_copies........... %u\n", dev->n_gc_copies);
- buf += sprintf(buf, "all_gcs............... %u\n", dev->all_gcs);
- buf +=
- sprintf(buf, "passive_gc_count...... %u\n", dev->passive_gc_count);
- buf +=
- sprintf(buf, "oldest_dirty_gc_count. %u\n",
- dev->oldest_dirty_gc_count);
- buf += sprintf(buf, "n_gc_blocks........... %u\n", dev->n_gc_blocks);
- buf += sprintf(buf, "bg_gcs................ %u\n", dev->bg_gcs);
- buf +=
- sprintf(buf, "n_retired_writes...... %u\n", dev->n_retired_writes);
- buf +=
- sprintf(buf, "n_retired_blocks...... %u\n", dev->n_retired_blocks);
- buf += sprintf(buf, "n_ecc_fixed........... %u\n", dev->n_ecc_fixed);
- buf += sprintf(buf, "n_ecc_unfixed......... %u\n", dev->n_ecc_unfixed);
- buf +=
- sprintf(buf, "n_tags_ecc_fixed...... %u\n", dev->n_tags_ecc_fixed);
- buf +=
- sprintf(buf, "n_tags_ecc_unfixed.... %u\n",
- dev->n_tags_ecc_unfixed);
- buf += sprintf(buf, "cache_hits............ %u\n", dev->cache_hits);
- buf +=
- sprintf(buf, "n_deleted_files....... %u\n", dev->n_deleted_files);
- buf +=
- sprintf(buf, "n_unlinked_files...... %u\n", dev->n_unlinked_files);
- buf += sprintf(buf, "refresh_count......... %u\n", dev->refresh_count);
- buf += sprintf(buf, "n_bg_deletions........ %u\n", dev->n_bg_deletions);
-
- return buf;
-}
-
-static int yaffs_proc_read(char *page,
- char **start,
- off_t offset, int count, int *eof, void *data)
-{
- struct list_head *item;
- char *buf = page;
- int step = offset;
- int n = 0;
-
- /* Get proc_file_read() to step 'offset' by one on each sucessive call.
- * We use 'offset' (*ppos) to indicate where we are in dev_list.
- * This also assumes the user has posted a read buffer large
- * enough to hold the complete output; but that's life in /proc.
- */
-
- *(int *)start = 1;
-
- /* Print header first */
- if (step == 0)
- buf += sprintf(buf, "YAFFS built:" __DATE__ " " __TIME__ "\n");
- else if (step == 1)
- buf += sprintf(buf, "\n");
- else {
- step -= 2;
-
- mutex_lock(&yaffs_context_lock);
-
- /* Locate and print the Nth entry. Order N-squared but N is small. */
- list_for_each(item, &yaffs_context_list) {
- struct yaffs_linux_context *dc =
- list_entry(item, struct yaffs_linux_context,
- context_list);
- struct yaffs_dev *dev = dc->dev;
-
- if (n < (step & ~1)) {
- n += 2;
- continue;
- }
- if ((step & 1) == 0) {
- buf +=
- sprintf(buf, "\nDevice %d \"%s\"\n", n,
- dev->param.name);
- buf = yaffs_dump_dev_part0(buf, dev);
- } else {
- buf = yaffs_dump_dev_part1(buf, dev);
- }
-
- break;
- }
- mutex_unlock(&yaffs_context_lock);
- }
-
- return buf - page < count ? buf - page : count;
-}
-
-
-/**
- * Set the verbosity of the warnings and error messages.
- *
- * Note that the names can only be a..z or _ with the current code.
- */
-
-static struct {
- char *mask_name;
- unsigned mask_bitfield;
-} mask_flags[] = {
- {"allocate", YAFFS_TRACE_ALLOCATE},
- {"always", YAFFS_TRACE_ALWAYS},
- {"background", YAFFS_TRACE_BACKGROUND},
- {"bad_blocks", YAFFS_TRACE_BAD_BLOCKS},
- {"buffers", YAFFS_TRACE_BUFFERS},
- {"bug", YAFFS_TRACE_BUG},
- {"checkpt", YAFFS_TRACE_CHECKPOINT},
- {"deletion", YAFFS_TRACE_DELETION},
- {"erase", YAFFS_TRACE_ERASE},
- {"error", YAFFS_TRACE_ERROR},
- {"gc_detail", YAFFS_TRACE_GC_DETAIL},
- {"gc", YAFFS_TRACE_GC},
- {"lock", YAFFS_TRACE_LOCK},
- {"mtd", YAFFS_TRACE_MTD},
- {"nandaccess", YAFFS_TRACE_NANDACCESS},
- {"os", YAFFS_TRACE_OS},
- {"scan_debug", YAFFS_TRACE_SCAN_DEBUG},
- {"scan", YAFFS_TRACE_SCAN},
- {"mount", YAFFS_TRACE_MOUNT},
- {"tracing", YAFFS_TRACE_TRACING},
- {"sync", YAFFS_TRACE_SYNC},
- {"write", YAFFS_TRACE_WRITE},
- {"verify", YAFFS_TRACE_VERIFY},
- {"verify_nand", YAFFS_TRACE_VERIFY_NAND},
- {"verify_full", YAFFS_TRACE_VERIFY_FULL},
- {"verify_all", YAFFS_TRACE_VERIFY_ALL},
- {"all", 0xffffffff},
- {"none", 0},
- {NULL, 0},
-};
-
-#define MAX_MASK_NAME_LENGTH 40
-static int yaffs_proc_write_trace_options(struct file *file, const char *buf,
- unsigned long count, void *data)
-{
- unsigned rg = 0, mask_bitfield;
- char *end;
- char *mask_name;
- const char *x;
- char substring[MAX_MASK_NAME_LENGTH + 1];
- int i;
- int done = 0;
- int add, len = 0;
- int pos = 0;
-
- rg = yaffs_trace_mask;
-
- while (!done && (pos < count)) {
- done = 1;
- while ((pos < count) && isspace(buf[pos]))
- pos++;
-
- switch (buf[pos]) {
- case '+':
- case '-':
- case '=':
- add = buf[pos];
- pos++;
- break;
-
- default:
- add = ' ';
- break;
- }
- mask_name = NULL;
-
- mask_bitfield = simple_strtoul(buf + pos, &end, 0);
-
- if (end > buf + pos) {
- mask_name = "numeral";
- len = end - (buf + pos);
- pos += len;
- done = 0;
- } else {
- for (x = buf + pos, i = 0;
- (*x == '_' || (*x >= 'a' && *x <= 'z')) &&
- i < MAX_MASK_NAME_LENGTH; x++, i++, pos++)
- substring[i] = *x;
- substring[i] = '\0';
-
- for (i = 0; mask_flags[i].mask_name != NULL; i++) {
- if (strcmp(substring, mask_flags[i].mask_name)
- == 0) {
- mask_name = mask_flags[i].mask_name;
- mask_bitfield =
- mask_flags[i].mask_bitfield;
- done = 0;
- break;
- }
- }
- }
-
- if (mask_name != NULL) {
- done = 0;
- switch (add) {
- case '-':
- rg &= ~mask_bitfield;
- break;
- case '+':
- rg |= mask_bitfield;
- break;
- case '=':
- rg = mask_bitfield;
- break;
- default:
- rg |= mask_bitfield;
- break;
- }
- }
- }
-
- yaffs_trace_mask = rg | YAFFS_TRACE_ALWAYS;
-
- printk(KERN_DEBUG "new trace = 0x%08X\n", yaffs_trace_mask);
-
- if (rg & YAFFS_TRACE_ALWAYS) {
- for (i = 0; mask_flags[i].mask_name != NULL; i++) {
- char flag;
- flag = ((rg & mask_flags[i].mask_bitfield) ==
- mask_flags[i].mask_bitfield) ? '+' : '-';
- printk(KERN_DEBUG "%c%s\n", flag,
- mask_flags[i].mask_name);
- }
- }
-
- return count;
-}
-
-static int yaffs_proc_write(struct file *file, const char *buf,
- unsigned long count, void *data)
-{
- return yaffs_proc_write_trace_options(file, buf, count, data);
-}
-
-/* Stuff to handle installation of file systems */
-struct file_system_to_install {
- struct file_system_type *fst;
- int installed;
-};
-
-static struct file_system_to_install fs_to_install[] = {
- {&yaffs_fs_type, 0},
- {&yaffs2_fs_type, 0},
- {NULL, 0}
-};
-
-static int __init init_yaffs_fs(void)
-{
- int error = 0;
- struct file_system_to_install *fsinst;
-
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs built " __DATE__ " " __TIME__ " Installing.");
-
-#ifdef CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "\n\nYAFFS-WARNING CONFIG_YAFFS_ALWAYS_CHECK_CHUNK_ERASED selected.\n\n\n");
-#endif
-
- mutex_init(&yaffs_context_lock);
-
- /* Install the proc_fs entries */
- my_proc_entry = create_proc_entry("yaffs",
- S_IRUGO | S_IFREG, YPROC_ROOT);
-
- if (my_proc_entry) {
- my_proc_entry->write_proc = yaffs_proc_write;
- my_proc_entry->read_proc = yaffs_proc_read;
- my_proc_entry->data = NULL;
- } else {
- return -ENOMEM;
- }
-
-
- /* Now add the file system entries */
-
- fsinst = fs_to_install;
-
- while (fsinst->fst && !error) {
- error = register_filesystem(fsinst->fst);
- if (!error)
- fsinst->installed = 1;
- fsinst++;
- }
-
- /* Any errors? uninstall */
- if (error) {
- fsinst = fs_to_install;
-
- while (fsinst->fst) {
- if (fsinst->installed) {
- unregister_filesystem(fsinst->fst);
- fsinst->installed = 0;
- }
- fsinst++;
- }
- }
-
- return error;
-}
-
-static void __exit exit_yaffs_fs(void)
-{
-
- struct file_system_to_install *fsinst;
-
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "yaffs built " __DATE__ " " __TIME__ " removing.");
-
- remove_proc_entry("yaffs", YPROC_ROOT);
-
- fsinst = fs_to_install;
-
- while (fsinst->fst) {
- if (fsinst->installed) {
- unregister_filesystem(fsinst->fst);
- fsinst->installed = 0;
- }
- fsinst++;
- }
-}
-
-module_init(init_yaffs_fs)
- module_exit(exit_yaffs_fs)
-
- MODULE_DESCRIPTION("YAFFS2 - a NAND specific flash file system");
-MODULE_AUTHOR("Charles Manning, Aleph One Ltd., 2002-2010");
-MODULE_LICENSE("GPL");
diff --git a/fs/yaffs2/yaffs_yaffs1.c b/fs/yaffs2/yaffs_yaffs1.c
deleted file mode 100644
index 9eb6030..0000000
--- a/fs/yaffs2/yaffs_yaffs1.c
+++ /dev/null
@@ -1,433 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_yaffs1.h"
-#include "yportenv.h"
-#include "yaffs_trace.h"
-#include "yaffs_bitmap.h"
-#include "yaffs_getblockinfo.h"
-#include "yaffs_nand.h"
-#include "yaffs_attribs.h"
-
-int yaffs1_scan(struct yaffs_dev *dev)
-{
- struct yaffs_ext_tags tags;
- int blk;
- int result;
-
- int chunk;
- int c;
- int deleted;
- enum yaffs_block_state state;
- struct yaffs_obj *hard_list = NULL;
- struct yaffs_block_info *bi;
- u32 seq_number;
- struct yaffs_obj_hdr *oh;
- struct yaffs_obj *in;
- struct yaffs_obj *parent;
-
- int alloc_failed = 0;
-
- struct yaffs_shadow_fixer *shadow_fixers = NULL;
-
- u8 *chunk_data;
-
- yaffs_trace(YAFFS_TRACE_SCAN,
- "yaffs1_scan starts intstartblk %d intendblk %d...",
- dev->internal_start_block, dev->internal_end_block);
-
- chunk_data = yaffs_get_temp_buffer(dev, __LINE__);
-
- dev->seq_number = YAFFS_LOWEST_SEQUENCE_NUMBER;
-
- /* Scan all the blocks to determine their state */
- bi = dev->block_info;
- for (blk = dev->internal_start_block; blk <= dev->internal_end_block;
- blk++) {
- yaffs_clear_chunk_bits(dev, blk);
- bi->pages_in_use = 0;
- bi->soft_del_pages = 0;
-
- yaffs_query_init_block_state(dev, blk, &state, &seq_number);
-
- bi->block_state = state;
- bi->seq_number = seq_number;
-
- if (bi->seq_number == YAFFS_SEQUENCE_BAD_BLOCK)
- bi->block_state = state = YAFFS_BLOCK_STATE_DEAD;
-
- yaffs_trace(YAFFS_TRACE_SCAN_DEBUG,
- "Block scanning block %d state %d seq %d",
- blk, state, seq_number);
-
- if (state == YAFFS_BLOCK_STATE_DEAD) {
- yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
- "block %d is bad", blk);
- } else if (state == YAFFS_BLOCK_STATE_EMPTY) {
- yaffs_trace(YAFFS_TRACE_SCAN_DEBUG, "Block empty ");
- dev->n_erased_blocks++;
- dev->n_free_chunks += dev->param.chunks_per_block;
- }
- bi++;
- }
-
- /* For each block.... */
- for (blk = dev->internal_start_block;
- !alloc_failed && blk <= dev->internal_end_block; blk++) {
-
- cond_resched();
-
- bi = yaffs_get_block_info(dev, blk);
- state = bi->block_state;
-
- deleted = 0;
-
- /* For each chunk in each block that needs scanning.... */
- for (c = 0; !alloc_failed && c < dev->param.chunks_per_block &&
- state == YAFFS_BLOCK_STATE_NEEDS_SCANNING; c++) {
- /* Read the tags and decide what to do */
- chunk = blk * dev->param.chunks_per_block + c;
-
- result = yaffs_rd_chunk_tags_nand(dev, chunk, NULL,
- &tags);
-
- /* Let's have a good look at this chunk... */
-
- if (tags.ecc_result == YAFFS_ECC_RESULT_UNFIXED
- || tags.is_deleted) {
- /* YAFFS1 only...
- * A deleted chunk
- */
- deleted++;
- dev->n_free_chunks++;
- /*T((" %d %d deleted\n",blk,c)); */
- } else if (!tags.chunk_used) {
- /* An unassigned chunk in the block
- * This means that either the block is empty or
- * this is the one being allocated from
- */
-
- if (c == 0) {
- /* We're looking at the first chunk in the block so the block is unused */
- state = YAFFS_BLOCK_STATE_EMPTY;
- dev->n_erased_blocks++;
- } else {
- /* this is the block being allocated from */
- yaffs_trace(YAFFS_TRACE_SCAN,
- " Allocating from %d %d",
- blk, c);
- state = YAFFS_BLOCK_STATE_ALLOCATING;
- dev->alloc_block = blk;
- dev->alloc_page = c;
- dev->alloc_block_finder = blk;
- /* Set block finder here to encourage the allocator to go forth from here. */
-
- }
-
- dev->n_free_chunks +=
- (dev->param.chunks_per_block - c);
- } else if (tags.chunk_id > 0) {
- /* chunk_id > 0 so it is a data chunk... */
- unsigned int endpos;
-
- yaffs_set_chunk_bit(dev, blk, c);
- bi->pages_in_use++;
-
- in = yaffs_find_or_create_by_number(dev,
- tags.obj_id,
- YAFFS_OBJECT_TYPE_FILE);
- /* PutChunkIntoFile checks for a clash (two data chunks with
- * the same chunk_id).
- */
-
- if (!in)
- alloc_failed = 1;
-
- if (in) {
- if (!yaffs_put_chunk_in_file
- (in, tags.chunk_id, chunk, 1))
- alloc_failed = 1;
- }
-
- endpos =
- (tags.chunk_id -
- 1) * dev->data_bytes_per_chunk +
- tags.n_bytes;
- if (in
- && in->variant_type ==
- YAFFS_OBJECT_TYPE_FILE
- && in->variant.file_variant.scanned_size <
- endpos) {
- in->variant.file_variant.scanned_size =
- endpos;
- if (!dev->param.use_header_file_size) {
- in->variant.
- file_variant.file_size =
- in->variant.
- file_variant.scanned_size;
- }
-
- }
- /* T((" %d %d data %d %d\n",blk,c,tags.obj_id,tags.chunk_id)); */
- } else {
- /* chunk_id == 0, so it is an ObjectHeader.
- * Thus, we read in the object header and make the object
- */
- yaffs_set_chunk_bit(dev, blk, c);
- bi->pages_in_use++;
-
- result = yaffs_rd_chunk_tags_nand(dev, chunk,
- chunk_data,
- NULL);
-
- oh = (struct yaffs_obj_hdr *)chunk_data;
-
- in = yaffs_find_by_number(dev, tags.obj_id);
- if (in && in->variant_type != oh->type) {
- /* This should not happen, but somehow
- * Wev'e ended up with an obj_id that has been reused but not yet
- * deleted, and worse still it has changed type. Delete the old object.
- */
-
- yaffs_del_obj(in);
-
- in = 0;
- }
-
- in = yaffs_find_or_create_by_number(dev,
- tags.obj_id,
- oh->type);
-
- if (!in)
- alloc_failed = 1;
-
- if (in && oh->shadows_obj > 0) {
-
- struct yaffs_shadow_fixer *fixer;
- fixer =
- kmalloc(sizeof
- (struct yaffs_shadow_fixer),
- GFP_NOFS);
- if (fixer) {
- fixer->next = shadow_fixers;
- shadow_fixers = fixer;
- fixer->obj_id = tags.obj_id;
- fixer->shadowed_id =
- oh->shadows_obj;
- yaffs_trace(YAFFS_TRACE_SCAN,
- " Shadow fixer: %d shadows %d",
- fixer->obj_id,
- fixer->shadowed_id);
-
- }
-
- }
-
- if (in && in->valid) {
- /* We have already filled this one. We have a duplicate and need to resolve it. */
-
- unsigned existing_serial = in->serial;
- unsigned new_serial =
- tags.serial_number;
-
- if (((existing_serial + 1) & 3) ==
- new_serial) {
- /* Use new one - destroy the exisiting one */
- yaffs_chunk_del(dev,
- in->hdr_chunk,
- 1, __LINE__);
- in->valid = 0;
- } else {
- /* Use existing - destroy this one. */
- yaffs_chunk_del(dev, chunk, 1,
- __LINE__);
- }
- }
-
- if (in && !in->valid &&
- (tags.obj_id == YAFFS_OBJECTID_ROOT ||
- tags.obj_id ==
- YAFFS_OBJECTID_LOSTNFOUND)) {
- /* We only load some info, don't fiddle with directory structure */
- in->valid = 1;
- in->variant_type = oh->type;
-
- in->yst_mode = oh->yst_mode;
- yaffs_load_attribs(in, oh);
- in->hdr_chunk = chunk;
- in->serial = tags.serial_number;
-
- } else if (in && !in->valid) {
- /* we need to load this info */
-
- in->valid = 1;
- in->variant_type = oh->type;
-
- in->yst_mode = oh->yst_mode;
- yaffs_load_attribs(in, oh);
- in->hdr_chunk = chunk;
- in->serial = tags.serial_number;
-
- yaffs_set_obj_name_from_oh(in, oh);
- in->dirty = 0;
-
- /* directory stuff...
- * hook up to parent
- */
-
- parent =
- yaffs_find_or_create_by_number
- (dev, oh->parent_obj_id,
- YAFFS_OBJECT_TYPE_DIRECTORY);
- if (!parent)
- alloc_failed = 1;
- if (parent && parent->variant_type ==
- YAFFS_OBJECT_TYPE_UNKNOWN) {
- /* Set up as a directory */
- parent->variant_type =
- YAFFS_OBJECT_TYPE_DIRECTORY;
- INIT_LIST_HEAD(&parent->
- variant.dir_variant.children);
- } else if (!parent
- || parent->variant_type !=
- YAFFS_OBJECT_TYPE_DIRECTORY) {
- /* Hoosterman, another problem....
- * We're trying to use a non-directory as a directory
- */
-
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: attempting to use non-directory as a directory in scan. Put in lost+found."
- );
- parent = dev->lost_n_found;
- }
-
- yaffs_add_obj_to_dir(parent, in);
-
- if (0 && (parent == dev->del_dir ||
- parent ==
- dev->unlinked_dir)) {
- in->deleted = 1; /* If it is unlinked at start up then it wants deleting */
- dev->n_deleted_files++;
- }
- /* Note re hardlinks.
- * Since we might scan a hardlink before its equivalent object is scanned
- * we put them all in a list.
- * After scanning is complete, we should have all the objects, so we run through this
- * list and fix up all the chains.
- */
-
- switch (in->variant_type) {
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- /* Todo got a problem */
- break;
- case YAFFS_OBJECT_TYPE_FILE:
- if (dev->param.
- use_header_file_size)
-
- in->variant.
- file_variant.file_size
- = oh->file_size;
-
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- in->variant.
- hardlink_variant.equiv_id =
- oh->equiv_id;
- in->hard_links.next =
- (struct list_head *)
- hard_list;
- hard_list = in;
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- /* Do nothing */
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- /* Do nothing */
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- in->variant.symlink_variant.
- alias =
- yaffs_clone_str(oh->alias);
- if (!in->variant.
- symlink_variant.alias)
- alloc_failed = 1;
- break;
- }
-
- }
- }
- }
-
- if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
- /* If we got this far while scanning, then the block is fully allocated. */
- state = YAFFS_BLOCK_STATE_FULL;
- }
-
- if (state == YAFFS_BLOCK_STATE_ALLOCATING) {
- /* If the block was partially allocated then treat it as fully allocated. */
- state = YAFFS_BLOCK_STATE_FULL;
- dev->alloc_block = -1;
- }
-
- bi->block_state = state;
-
- /* Now let's see if it was dirty */
- if (bi->pages_in_use == 0 &&
- !bi->has_shrink_hdr &&
- bi->block_state == YAFFS_BLOCK_STATE_FULL) {
- yaffs_block_became_dirty(dev, blk);
- }
-
- }
-
- /* Ok, we've done all the scanning.
- * Fix up the hard link chains.
- * We should now have scanned all the objects, now it's time to add these
- * hardlinks.
- */
-
- yaffs_link_fixup(dev, hard_list);
-
- /* Fix up any shadowed objects */
- {
- struct yaffs_shadow_fixer *fixer;
- struct yaffs_obj *obj;
-
- while (shadow_fixers) {
- fixer = shadow_fixers;
- shadow_fixers = fixer->next;
- /* Complete the rename transaction by deleting the shadowed object
- * then setting the object header to unshadowed.
- */
- obj = yaffs_find_by_number(dev, fixer->shadowed_id);
- if (obj)
- yaffs_del_obj(obj);
-
- obj = yaffs_find_by_number(dev, fixer->obj_id);
-
- if (obj)
- yaffs_update_oh(obj, NULL, 1, 0, 0, NULL);
-
- kfree(fixer);
- }
- }
-
- yaffs_release_temp_buffer(dev, chunk_data, __LINE__);
-
- if (alloc_failed)
- return YAFFS_FAIL;
-
- yaffs_trace(YAFFS_TRACE_SCAN, "yaffs1_scan ends");
-
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_yaffs1.h b/fs/yaffs2/yaffs_yaffs1.h
deleted file mode 100644
index db23e04..0000000
--- a/fs/yaffs2/yaffs_yaffs1.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_YAFFS1_H__
-#define __YAFFS_YAFFS1_H__
-
-#include "yaffs_guts.h"
-int yaffs1_scan(struct yaffs_dev *dev);
-
-#endif
diff --git a/fs/yaffs2/yaffs_yaffs2.c b/fs/yaffs2/yaffs_yaffs2.c
deleted file mode 100644
index 33397af..0000000
--- a/fs/yaffs2/yaffs_yaffs2.c
+++ /dev/null
@@ -1,1598 +0,0 @@
-/*
- * YAFFS: Yet Another Flash File System. A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include "yaffs_guts.h"
-#include "yaffs_trace.h"
-#include "yaffs_yaffs2.h"
-#include "yaffs_checkptrw.h"
-#include "yaffs_bitmap.h"
-#include "yaffs_nand.h"
-#include "yaffs_getblockinfo.h"
-#include "yaffs_verify.h"
-#include "yaffs_attribs.h"
-
-/*
- * Checkpoints are really no benefit on very small partitions.
- *
- * To save space on small partitions don't bother with checkpoints unless
- * the partition is at least this big.
- */
-#define YAFFS_CHECKPOINT_MIN_BLOCKS 60
-
-#define YAFFS_SMALL_HOLE_THRESHOLD 4
-
-/*
- * Oldest Dirty Sequence Number handling.
- */
-
-/* yaffs_calc_oldest_dirty_seq()
- * yaffs2_find_oldest_dirty_seq()
- * Calculate the oldest dirty sequence number if we don't know it.
- */
-void yaffs_calc_oldest_dirty_seq(struct yaffs_dev *dev)
-{
- int i;
- unsigned seq;
- unsigned block_no = 0;
- struct yaffs_block_info *b;
-
- if (!dev->param.is_yaffs2)
- return;
-
- /* Find the oldest dirty sequence number. */
- seq = dev->seq_number + 1;
- b = dev->block_info;
- for (i = dev->internal_start_block; i <= dev->internal_end_block; i++) {
- if (b->block_state == YAFFS_BLOCK_STATE_FULL &&
- (b->pages_in_use - b->soft_del_pages) <
- dev->param.chunks_per_block && b->seq_number < seq) {
- seq = b->seq_number;
- block_no = i;
- }
- b++;
- }
-
- if (block_no) {
- dev->oldest_dirty_seq = seq;
- dev->oldest_dirty_block = block_no;
- }
-
-}
-
-void yaffs2_find_oldest_dirty_seq(struct yaffs_dev *dev)
-{
- if (!dev->param.is_yaffs2)
- return;
-
- if (!dev->oldest_dirty_seq)
- yaffs_calc_oldest_dirty_seq(dev);
-}
-
-/*
- * yaffs_clear_oldest_dirty_seq()
- * Called when a block is erased or marked bad. (ie. when its seq_number
- * becomes invalid). If the value matches the oldest then we clear
- * dev->oldest_dirty_seq to force its recomputation.
- */
-void yaffs2_clear_oldest_dirty_seq(struct yaffs_dev *dev,
- struct yaffs_block_info *bi)
-{
-
- if (!dev->param.is_yaffs2)
- return;
-
- if (!bi || bi->seq_number == dev->oldest_dirty_seq) {
- dev->oldest_dirty_seq = 0;
- dev->oldest_dirty_block = 0;
- }
-}
-
-/*
- * yaffs2_update_oldest_dirty_seq()
- * Update the oldest dirty sequence number whenever we dirty a block.
- * Only do this if the oldest_dirty_seq is actually being tracked.
- */
-void yaffs2_update_oldest_dirty_seq(struct yaffs_dev *dev, unsigned block_no,
- struct yaffs_block_info *bi)
-{
- if (!dev->param.is_yaffs2)
- return;
-
- if (dev->oldest_dirty_seq) {
- if (dev->oldest_dirty_seq > bi->seq_number) {
- dev->oldest_dirty_seq = bi->seq_number;
- dev->oldest_dirty_block = block_no;
- }
- }
-}
-
-int yaffs_block_ok_for_gc(struct yaffs_dev *dev, struct yaffs_block_info *bi)
-{
-
- if (!dev->param.is_yaffs2)
- return 1; /* disqualification only applies to yaffs2. */
-
- if (!bi->has_shrink_hdr)
- return 1; /* can gc */
-
- yaffs2_find_oldest_dirty_seq(dev);
-
- /* Can't do gc of this block if there are any blocks older than this one that have
- * discarded pages.
- */
- return (bi->seq_number <= dev->oldest_dirty_seq);
-}
-
-/*
- * yaffs2_find_refresh_block()
- * periodically finds the oldest full block by sequence number for refreshing.
- * Only for yaffs2.
- */
-u32 yaffs2_find_refresh_block(struct yaffs_dev * dev)
-{
- u32 b;
-
- u32 oldest = 0;
- u32 oldest_seq = 0;
-
- struct yaffs_block_info *bi;
-
- if (!dev->param.is_yaffs2)
- return oldest;
-
- /*
- * If refresh period < 10 then refreshing is disabled.
- */
- if (dev->param.refresh_period < 10)
- return oldest;
-
- /*
- * Fix broken values.
- */
- if (dev->refresh_skip > dev->param.refresh_period)
- dev->refresh_skip = dev->param.refresh_period;
-
- if (dev->refresh_skip > 0)
- return oldest;
-
- /*
- * Refresh skip is now zero.
- * We'll do a refresh this time around....
- * Update the refresh skip and find the oldest block.
- */
- dev->refresh_skip = dev->param.refresh_period;
- dev->refresh_count++;
- bi = dev->block_info;
- for (b = dev->internal_start_block; b <= dev->internal_end_block; b++) {
-
- if (bi->block_state == YAFFS_BLOCK_STATE_FULL) {
-
- if (oldest < 1 || bi->seq_number < oldest_seq) {
- oldest = b;
- oldest_seq = bi->seq_number;
- }
- }
- bi++;
- }
-
- if (oldest > 0) {
- yaffs_trace(YAFFS_TRACE_GC,
- "GC refresh count %d selected block %d with seq_number %d",
- dev->refresh_count, oldest, oldest_seq);
- }
-
- return oldest;
-}
-
-int yaffs2_checkpt_required(struct yaffs_dev *dev)
-{
- int nblocks;
-
- if (!dev->param.is_yaffs2)
- return 0;
-
- nblocks = dev->internal_end_block - dev->internal_start_block + 1;
-
- return !dev->param.skip_checkpt_wr &&
- !dev->read_only && (nblocks >= YAFFS_CHECKPOINT_MIN_BLOCKS);
-}
-
-int yaffs_calc_checkpt_blocks_required(struct yaffs_dev *dev)
-{
- int retval;
-
- if (!dev->param.is_yaffs2)
- return 0;
-
- if (!dev->checkpoint_blocks_required && yaffs2_checkpt_required(dev)) {
- /* Not a valid value so recalculate */
- int n_bytes = 0;
- int n_blocks;
- int dev_blocks =
- (dev->param.end_block - dev->param.start_block + 1);
-
- n_bytes += sizeof(struct yaffs_checkpt_validity);
- n_bytes += sizeof(struct yaffs_checkpt_dev);
- n_bytes += dev_blocks * sizeof(struct yaffs_block_info);
- n_bytes += dev_blocks * dev->chunk_bit_stride;
- n_bytes +=
- (sizeof(struct yaffs_checkpt_obj) +
- sizeof(u32)) * (dev->n_obj);
- n_bytes += (dev->tnode_size + sizeof(u32)) * (dev->n_tnodes);
- n_bytes += sizeof(struct yaffs_checkpt_validity);
- n_bytes += sizeof(u32); /* checksum */
-
- /* Round up and add 2 blocks to allow for some bad blocks, so add 3 */
-
- n_blocks =
- (n_bytes /
- (dev->data_bytes_per_chunk *
- dev->param.chunks_per_block)) + 3;
-
- dev->checkpoint_blocks_required = n_blocks;
- }
-
- retval = dev->checkpoint_blocks_required - dev->blocks_in_checkpt;
- if (retval < 0)
- retval = 0;
- return retval;
-}
-
-/*--------------------- Checkpointing --------------------*/
-
-static int yaffs2_wr_checkpt_validity_marker(struct yaffs_dev *dev, int head)
-{
- struct yaffs_checkpt_validity cp;
-
- memset(&cp, 0, sizeof(cp));
-
- cp.struct_type = sizeof(cp);
- cp.magic = YAFFS_MAGIC;
- cp.version = YAFFS_CHECKPOINT_VERSION;
- cp.head = (head) ? 1 : 0;
-
- return (yaffs2_checkpt_wr(dev, &cp, sizeof(cp)) == sizeof(cp)) ? 1 : 0;
-}
-
-static int yaffs2_rd_checkpt_validity_marker(struct yaffs_dev *dev, int head)
-{
- struct yaffs_checkpt_validity cp;
- int ok;
-
- ok = (yaffs2_checkpt_rd(dev, &cp, sizeof(cp)) == sizeof(cp));
-
- if (ok)
- ok = (cp.struct_type == sizeof(cp)) &&
- (cp.magic == YAFFS_MAGIC) &&
- (cp.version == YAFFS_CHECKPOINT_VERSION) &&
- (cp.head == ((head) ? 1 : 0));
- return ok ? 1 : 0;
-}
-
-static void yaffs2_dev_to_checkpt_dev(struct yaffs_checkpt_dev *cp,
- struct yaffs_dev *dev)
-{
- cp->n_erased_blocks = dev->n_erased_blocks;
- cp->alloc_block = dev->alloc_block;
- cp->alloc_page = dev->alloc_page;
- cp->n_free_chunks = dev->n_free_chunks;
-
- cp->n_deleted_files = dev->n_deleted_files;
- cp->n_unlinked_files = dev->n_unlinked_files;
- cp->n_bg_deletions = dev->n_bg_deletions;
- cp->seq_number = dev->seq_number;
-
-}
-
-static void yaffs_checkpt_dev_to_dev(struct yaffs_dev *dev,
- struct yaffs_checkpt_dev *cp)
-{
- dev->n_erased_blocks = cp->n_erased_blocks;
- dev->alloc_block = cp->alloc_block;
- dev->alloc_page = cp->alloc_page;
- dev->n_free_chunks = cp->n_free_chunks;
-
- dev->n_deleted_files = cp->n_deleted_files;
- dev->n_unlinked_files = cp->n_unlinked_files;
- dev->n_bg_deletions = cp->n_bg_deletions;
- dev->seq_number = cp->seq_number;
-}
-
-static int yaffs2_wr_checkpt_dev(struct yaffs_dev *dev)
-{
- struct yaffs_checkpt_dev cp;
- u32 n_bytes;
- u32 n_blocks =
- (dev->internal_end_block - dev->internal_start_block + 1);
-
- int ok;
-
- /* Write device runtime values */
- yaffs2_dev_to_checkpt_dev(&cp, dev);
- cp.struct_type = sizeof(cp);
-
- ok = (yaffs2_checkpt_wr(dev, &cp, sizeof(cp)) == sizeof(cp));
-
- /* Write block info */
- if (ok) {
- n_bytes = n_blocks * sizeof(struct yaffs_block_info);
- ok = (yaffs2_checkpt_wr(dev, dev->block_info, n_bytes) ==
- n_bytes);
- }
-
- /* Write chunk bits */
- if (ok) {
- n_bytes = n_blocks * dev->chunk_bit_stride;
- ok = (yaffs2_checkpt_wr(dev, dev->chunk_bits, n_bytes) ==
- n_bytes);
- }
- return ok ? 1 : 0;
-
-}
-
-static int yaffs2_rd_checkpt_dev(struct yaffs_dev *dev)
-{
- struct yaffs_checkpt_dev cp;
- u32 n_bytes;
- u32 n_blocks =
- (dev->internal_end_block - dev->internal_start_block + 1);
-
- int ok;
-
- ok = (yaffs2_checkpt_rd(dev, &cp, sizeof(cp)) == sizeof(cp));
- if (!ok)
- return 0;
-
- if (cp.struct_type != sizeof(cp))
- return 0;
-
- yaffs_checkpt_dev_to_dev(dev, &cp);
-
- n_bytes = n_blocks * sizeof(struct yaffs_block_info);
-
- ok = (yaffs2_checkpt_rd(dev, dev->block_info, n_bytes) == n_bytes);
-
- if (!ok)
- return 0;
- n_bytes = n_blocks * dev->chunk_bit_stride;
-
- ok = (yaffs2_checkpt_rd(dev, dev->chunk_bits, n_bytes) == n_bytes);
-
- return ok ? 1 : 0;
-}
-
-static void yaffs2_obj_checkpt_obj(struct yaffs_checkpt_obj *cp,
- struct yaffs_obj *obj)
-{
-
- cp->obj_id = obj->obj_id;
- cp->parent_id = (obj->parent) ? obj->parent->obj_id : 0;
- cp->hdr_chunk = obj->hdr_chunk;
- cp->variant_type = obj->variant_type;
- cp->deleted = obj->deleted;
- cp->soft_del = obj->soft_del;
- cp->unlinked = obj->unlinked;
- cp->fake = obj->fake;
- cp->rename_allowed = obj->rename_allowed;
- cp->unlink_allowed = obj->unlink_allowed;
- cp->serial = obj->serial;
- cp->n_data_chunks = obj->n_data_chunks;
-
- if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE)
- cp->size_or_equiv_obj = obj->variant.file_variant.file_size;
- else if (obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK)
- cp->size_or_equiv_obj = obj->variant.hardlink_variant.equiv_id;
-}
-
-static int taffs2_checkpt_obj_to_obj(struct yaffs_obj *obj,
- struct yaffs_checkpt_obj *cp)
-{
-
- struct yaffs_obj *parent;
-
- if (obj->variant_type != cp->variant_type) {
- yaffs_trace(YAFFS_TRACE_ERROR,
- "Checkpoint read object %d type %d chunk %d does not match existing object type %d",
- cp->obj_id, cp->variant_type, cp->hdr_chunk,
- obj->variant_type);
- return 0;
- }
-
- obj->obj_id = cp->obj_id;
-
- if (cp->parent_id)
- parent = yaffs_find_or_create_by_number(obj->my_dev,
- cp->parent_id,
- YAFFS_OBJECT_TYPE_DIRECTORY);
- else
- parent = NULL;
-
- if (parent) {
- if (parent->variant_type != YAFFS_OBJECT_TYPE_DIRECTORY) {
- yaffs_trace(YAFFS_TRACE_ALWAYS,
- "Checkpoint read object %d parent %d type %d chunk %d Parent type, %d, not directory",
- cp->obj_id, cp->parent_id,
- cp->variant_type, cp->hdr_chunk,
- parent->variant_type);
- return 0;
- }
- yaffs_add_obj_to_dir(parent, obj);
- }
-
- obj->hdr_chunk = cp->hdr_chunk;
- obj->variant_type = cp->variant_type;
- obj->deleted = cp->deleted;
- obj->soft_del = cp->soft_del;
- obj->unlinked = cp->unlinked;
- obj->fake = cp->fake;
- obj->rename_allowed = cp->rename_allowed;
- obj->unlink_allowed = cp->unlink_allowed;
- obj->serial = cp->serial;
- obj->n_data_chunks = cp->n_data_chunks;
-
- if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE)
- obj->variant.file_variant.file_size = cp->size_or_equiv_obj;
- else if (obj->variant_type == YAFFS_OBJECT_TYPE_HARDLINK)
- obj->variant.hardlink_variant.equiv_id = cp->size_or_equiv_obj;
-
- if (obj->hdr_chunk > 0)
- obj->lazy_loaded = 1;
- return 1;
-}
-
-static int yaffs2_checkpt_tnode_worker(struct yaffs_obj *in,
- struct yaffs_tnode *tn, u32 level,
- int chunk_offset)
-{
- int i;
- struct yaffs_dev *dev = in->my_dev;
- int ok = 1;
-
- if (tn) {
- if (level > 0) {
-
- for (i = 0; i < YAFFS_NTNODES_INTERNAL && ok; i++) {
- if (tn->internal[i]) {
- ok = yaffs2_checkpt_tnode_worker(in,
- tn->
- internal
- [i],
- level -
- 1,
- (chunk_offset
- <<
- YAFFS_TNODES_INTERNAL_BITS)
- + i);
- }
- }
- } else if (level == 0) {
- u32 base_offset =
- chunk_offset << YAFFS_TNODES_LEVEL0_BITS;
- ok = (yaffs2_checkpt_wr
- (dev, &base_offset,
- sizeof(base_offset)) == sizeof(base_offset));
- if (ok)
- ok = (yaffs2_checkpt_wr
- (dev, tn,
- dev->tnode_size) == dev->tnode_size);
- }
- }
-
- return ok;
-
-}
-
-static int yaffs2_wr_checkpt_tnodes(struct yaffs_obj *obj)
-{
- u32 end_marker = ~0;
- int ok = 1;
-
- if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE) {
- ok = yaffs2_checkpt_tnode_worker(obj,
- obj->variant.file_variant.top,
- obj->variant.file_variant.
- top_level, 0);
- if (ok)
- ok = (yaffs2_checkpt_wr
- (obj->my_dev, &end_marker,
- sizeof(end_marker)) == sizeof(end_marker));
- }
-
- return ok ? 1 : 0;
-}
-
-static int yaffs2_rd_checkpt_tnodes(struct yaffs_obj *obj)
-{
- u32 base_chunk;
- int ok = 1;
- struct yaffs_dev *dev = obj->my_dev;
- struct yaffs_file_var *file_stuct_ptr = &obj->variant.file_variant;
- struct yaffs_tnode *tn;
- int nread = 0;
-
- ok = (yaffs2_checkpt_rd(dev, &base_chunk, sizeof(base_chunk)) ==
- sizeof(base_chunk));
-
- while (ok && (~base_chunk)) {
- nread++;
- /* Read level 0 tnode */
-
- tn = yaffs_get_tnode(dev);
- if (tn) {
- ok = (yaffs2_checkpt_rd(dev, tn, dev->tnode_size) ==
- dev->tnode_size);
- } else {
- ok = 0;
- }
-
- if (tn && ok)
- ok = yaffs_add_find_tnode_0(dev,
- file_stuct_ptr,
- base_chunk, tn) ? 1 : 0;
-
- if (ok)
- ok = (yaffs2_checkpt_rd
- (dev, &base_chunk,
- sizeof(base_chunk)) == sizeof(base_chunk));
-
- }
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "Checkpoint read tnodes %d records, last %d. ok %d",
- nread, base_chunk, ok);
-
- return ok ? 1 : 0;
-}
-
-static int yaffs2_wr_checkpt_objs(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj;
- struct yaffs_checkpt_obj cp;
- int i;
- int ok = 1;
- struct list_head *lh;
-
- /* Iterate through the objects in each hash entry,
- * dumping them to the checkpointing stream.
- */
-
- for (i = 0; ok && i < YAFFS_NOBJECT_BUCKETS; i++) {
- list_for_each(lh, &dev->obj_bucket[i].list) {
- if (lh) {
- obj =
- list_entry(lh, struct yaffs_obj, hash_link);
- if (!obj->defered_free) {
- yaffs2_obj_checkpt_obj(&cp, obj);
- cp.struct_type = sizeof(cp);
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "Checkpoint write object %d parent %d type %d chunk %d obj addr %p",
- cp.obj_id, cp.parent_id,
- cp.variant_type, cp.hdr_chunk, obj);
-
- ok = (yaffs2_checkpt_wr
- (dev, &cp,
- sizeof(cp)) == sizeof(cp));
-
- if (ok
- && obj->variant_type ==
- YAFFS_OBJECT_TYPE_FILE)
- ok = yaffs2_wr_checkpt_tnodes
- (obj);
- }
- }
- }
- }
-
- /* Dump end of list */
- memset(&cp, 0xFF, sizeof(struct yaffs_checkpt_obj));
- cp.struct_type = sizeof(cp);
-
- if (ok)
- ok = (yaffs2_checkpt_wr(dev, &cp, sizeof(cp)) == sizeof(cp));
-
- return ok ? 1 : 0;
-}
-
-static int yaffs2_rd_checkpt_objs(struct yaffs_dev *dev)
-{
- struct yaffs_obj *obj;
- struct yaffs_checkpt_obj cp;
- int ok = 1;
- int done = 0;
- struct yaffs_obj *hard_list = NULL;
-
- while (ok && !done) {
- ok = (yaffs2_checkpt_rd(dev, &cp, sizeof(cp)) == sizeof(cp));
- if (cp.struct_type != sizeof(cp)) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "struct size %d instead of %d ok %d",
- cp.struct_type, (int)sizeof(cp), ok);
- ok = 0;
- }
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "Checkpoint read object %d parent %d type %d chunk %d ",
- cp.obj_id, cp.parent_id, cp.variant_type,
- cp.hdr_chunk);
-
- if (ok && cp.obj_id == ~0) {
- done = 1;
- } else if (ok) {
- obj =
- yaffs_find_or_create_by_number(dev, cp.obj_id,
- cp.variant_type);
- if (obj) {
- ok = taffs2_checkpt_obj_to_obj(obj, &cp);
- if (!ok)
- break;
- if (obj->variant_type == YAFFS_OBJECT_TYPE_FILE) {
- ok = yaffs2_rd_checkpt_tnodes(obj);
- } else if (obj->variant_type ==
- YAFFS_OBJECT_TYPE_HARDLINK) {
- obj->hard_links.next =
- (struct list_head *)hard_list;
- hard_list = obj;
- }
- } else {
- ok = 0;
- }
- }
- }
-
- if (ok)
- yaffs_link_fixup(dev, hard_list);
-
- return ok ? 1 : 0;
-}
-
-static int yaffs2_wr_checkpt_sum(struct yaffs_dev *dev)
-{
- u32 checkpt_sum;
- int ok;
-
- yaffs2_get_checkpt_sum(dev, &checkpt_sum);
-
- ok = (yaffs2_checkpt_wr(dev, &checkpt_sum, sizeof(checkpt_sum)) ==
- sizeof(checkpt_sum));
-
- if (!ok)
- return 0;
-
- return 1;
-}
-
-static int yaffs2_rd_checkpt_sum(struct yaffs_dev *dev)
-{
- u32 checkpt_sum0;
- u32 checkpt_sum1;
- int ok;
-
- yaffs2_get_checkpt_sum(dev, &checkpt_sum0);
-
- ok = (yaffs2_checkpt_rd(dev, &checkpt_sum1, sizeof(checkpt_sum1)) ==
- sizeof(checkpt_sum1));
-
- if (!ok)
- return 0;
-
- if (checkpt_sum0 != checkpt_sum1)
- return 0;
-
- return 1;
-}
-
-static int yaffs2_wr_checkpt_data(struct yaffs_dev *dev)
-{
- int ok = 1;
-
- if (!yaffs2_checkpt_required(dev)) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "skipping checkpoint write");
- ok = 0;
- }
-
- if (ok)
- ok = yaffs2_checkpt_open(dev, 1);
-
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "write checkpoint validity");
- ok = yaffs2_wr_checkpt_validity_marker(dev, 1);
- }
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "write checkpoint device");
- ok = yaffs2_wr_checkpt_dev(dev);
- }
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "write checkpoint objects");
- ok = yaffs2_wr_checkpt_objs(dev);
- }
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "write checkpoint validity");
- ok = yaffs2_wr_checkpt_validity_marker(dev, 0);
- }
-
- if (ok)
- ok = yaffs2_wr_checkpt_sum(dev);
-
- if (!yaffs_checkpt_close(dev))
- ok = 0;
-
- if (ok)
- dev->is_checkpointed = 1;
- else
- dev->is_checkpointed = 0;
-
- return dev->is_checkpointed;
-}
-
-static int yaffs2_rd_checkpt_data(struct yaffs_dev *dev)
-{
- int ok = 1;
-
- if (!dev->param.is_yaffs2)
- ok = 0;
-
- if (ok && dev->param.skip_checkpt_rd) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "skipping checkpoint read");
- ok = 0;
- }
-
- if (ok)
- ok = yaffs2_checkpt_open(dev, 0); /* open for read */
-
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "read checkpoint validity");
- ok = yaffs2_rd_checkpt_validity_marker(dev, 1);
- }
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "read checkpoint device");
- ok = yaffs2_rd_checkpt_dev(dev);
- }
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "read checkpoint objects");
- ok = yaffs2_rd_checkpt_objs(dev);
- }
- if (ok) {
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "read checkpoint validity");
- ok = yaffs2_rd_checkpt_validity_marker(dev, 0);
- }
-
- if (ok) {
- ok = yaffs2_rd_checkpt_sum(dev);
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "read checkpoint checksum %d", ok);
- }
-
- if (!yaffs_checkpt_close(dev))
- ok = 0;
-
- if (ok)
- dev->is_checkpointed = 1;
- else
- dev->is_checkpointed = 0;
-
- return ok ? 1 : 0;
-
-}
-
-void yaffs2_checkpt_invalidate(struct yaffs_dev *dev)
-{
- if (dev->is_checkpointed || dev->blocks_in_checkpt > 0) {
- dev->is_checkpointed = 0;
- yaffs2_checkpt_invalidate_stream(dev);
- }
- if (dev->param.sb_dirty_fn)
- dev->param.sb_dirty_fn(dev);
-}
-
-int yaffs_checkpoint_save(struct yaffs_dev *dev)
-{
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "save entry: is_checkpointed %d",
- dev->is_checkpointed);
-
- yaffs_verify_objects(dev);
- yaffs_verify_blocks(dev);
- yaffs_verify_free_chunks(dev);
-
- if (!dev->is_checkpointed) {
- yaffs2_checkpt_invalidate(dev);
- yaffs2_wr_checkpt_data(dev);
- }
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT | YAFFS_TRACE_MOUNT,
- "save exit: is_checkpointed %d",
- dev->is_checkpointed);
-
- return dev->is_checkpointed;
-}
-
-int yaffs2_checkpt_restore(struct yaffs_dev *dev)
-{
- int retval;
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "restore entry: is_checkpointed %d",
- dev->is_checkpointed);
-
- retval = yaffs2_rd_checkpt_data(dev);
-
- if (dev->is_checkpointed) {
- yaffs_verify_objects(dev);
- yaffs_verify_blocks(dev);
- yaffs_verify_free_chunks(dev);
- }
-
- yaffs_trace(YAFFS_TRACE_CHECKPOINT,
- "restore exit: is_checkpointed %d",
- dev->is_checkpointed);
-
- return retval;
-}
-
-int yaffs2_handle_hole(struct yaffs_obj *obj, loff_t new_size)
-{
- /* if new_size > old_file_size.
- * We're going to be writing a hole.
- * If the hole is small then write zeros otherwise write a start of hole marker.
- */
-
- loff_t old_file_size;
- int increase;
- int small_hole;
- int result = YAFFS_OK;
- struct yaffs_dev *dev = NULL;
-
- u8 *local_buffer = NULL;
-
- int small_increase_ok = 0;
-
- if (!obj)
- return YAFFS_FAIL;
-
- if (obj->variant_type != YAFFS_OBJECT_TYPE_FILE)
- return YAFFS_FAIL;
-
- dev = obj->my_dev;
-
- /* Bail out if not yaffs2 mode */
- if (!dev->param.is_yaffs2)
- return YAFFS_OK;
-
- old_file_size = obj->variant.file_variant.file_size;
-
- if (new_size <= old_file_size)
- return YAFFS_OK;
-
- increase = new_size - old_file_size;
-
- if (increase < YAFFS_SMALL_HOLE_THRESHOLD * dev->data_bytes_per_chunk &&
- yaffs_check_alloc_available(dev, YAFFS_SMALL_HOLE_THRESHOLD + 1))
- small_hole = 1;
- else
- small_hole = 0;
-
- if (small_hole)
- local_buffer = yaffs_get_temp_buffer(dev, __LINE__);
-
- if (local_buffer) {
- /* fill hole with zero bytes */
- int pos = old_file_size;
- int this_write;
- int written;
- memset(local_buffer, 0, dev->data_bytes_per_chunk);
- small_increase_ok = 1;
-
- while (increase > 0 && small_increase_ok) {
- this_write = increase;
- if (this_write > dev->data_bytes_per_chunk)
- this_write = dev->data_bytes_per_chunk;
- written =
- yaffs_do_file_wr(obj, local_buffer, pos, this_write,
- 0);
- if (written == this_write) {
- pos += this_write;
- increase -= this_write;
- } else {
- small_increase_ok = 0;
- }
- }
-
- yaffs_release_temp_buffer(dev, local_buffer, __LINE__);
-
- /* If we were out of space then reverse any chunks we've added */
- if (!small_increase_ok)
- yaffs_resize_file_down(obj, old_file_size);
- }
-
- if (!small_increase_ok &&
- obj->parent &&
- obj->parent->obj_id != YAFFS_OBJECTID_UNLINKED &&
- obj->parent->obj_id != YAFFS_OBJECTID_DELETED) {
- /* Write a hole start header with the old file size */
- yaffs_update_oh(obj, NULL, 0, 1, 0, NULL);
- }
-
- return result;
-
-}
-
-struct yaffs_block_index {
- int seq;
- int block;
-};
-
-static int yaffs2_ybicmp(const void *a, const void *b)
-{
- int aseq = ((struct yaffs_block_index *)a)->seq;
- int bseq = ((struct yaffs_block_index *)b)->seq;
- int ablock = ((struct yaffs_block_index *)a)->block;
- int bblock = ((struct yaffs_block_index *)b)->block;
- if (aseq == bseq)
- return ablock - bblock;
- else
- return aseq - bseq;
-}
-
-int yaffs2_scan_backwards(struct yaffs_dev *dev)
-{
- struct yaffs_ext_tags tags;
- int blk;
- int block_iter;
- int start_iter;
- int end_iter;
- int n_to_scan = 0;
-
- int chunk;
- int result;
- int c;
- int deleted;
- enum yaffs_block_state state;
- struct yaffs_obj *hard_list = NULL;
- struct yaffs_block_info *bi;
- u32 seq_number;
- struct yaffs_obj_hdr *oh;
- struct yaffs_obj *in;
- struct yaffs_obj *parent;
- int n_blocks = dev->internal_end_block - dev->internal_start_block + 1;
- int is_unlinked;
- u8 *chunk_data;
-
- int file_size;
- int is_shrink;
- int found_chunks;
- int equiv_id;
- int alloc_failed = 0;
-
- struct yaffs_block_index *block_index = NULL;
- int alt_block_index = 0;
-
- yaffs_trace(YAFFS_TRACE_SCAN,
- "yaffs2_scan_backwards starts intstartblk %d intendblk %d...",
- dev->internal_start_block, dev->internal_end_block);
-
- dev->seq_number = YAFFS_LOWEST_SEQUENCE_NUMBER;
-
- block_index = kmalloc(n_blocks * sizeof(struct yaffs_block_index),
- GFP_NOFS);
-
- if (!block_index) {
- block_index =
- vmalloc(n_blocks * sizeof(struct yaffs_block_index));
- alt_block_index = 1;
- }
-
- if (!block_index) {
- yaffs_trace(YAFFS_TRACE_SCAN,
- "yaffs2_scan_backwards() could not allocate block index!"
- );
- return YAFFS_FAIL;
- }
-
- dev->blocks_in_checkpt = 0;
-
- chunk_data = yaffs_get_temp_buffer(dev, __LINE__);
-
- /* Scan all the blocks to determine their state */
- bi = dev->block_info;
- for (blk = dev->internal_start_block; blk <= dev->internal_end_block;
- blk++) {
- yaffs_clear_chunk_bits(dev, blk);
- bi->pages_in_use = 0;
- bi->soft_del_pages = 0;
-
- yaffs_query_init_block_state(dev, blk, &state, &seq_number);
-
- bi->block_state = state;
- bi->seq_number = seq_number;
-
- if (bi->seq_number == YAFFS_SEQUENCE_CHECKPOINT_DATA)
- bi->block_state = state = YAFFS_BLOCK_STATE_CHECKPOINT;
- if (bi->seq_number == YAFFS_SEQUENCE_BAD_BLOCK)
- bi->block_state = state = YAFFS_BLOCK_STATE_DEAD;
-
- yaffs_trace(YAFFS_TRACE_SCAN_DEBUG,
- "Block scanning block %d state %d seq %d",
- blk, state, seq_number);
-
- if (state == YAFFS_BLOCK_STATE_CHECKPOINT) {
- dev->blocks_in_checkpt++;
-
- } else if (state == YAFFS_BLOCK_STATE_DEAD) {
- yaffs_trace(YAFFS_TRACE_BAD_BLOCKS,
- "block %d is bad", blk);
- } else if (state == YAFFS_BLOCK_STATE_EMPTY) {
- yaffs_trace(YAFFS_TRACE_SCAN_DEBUG, "Block empty ");
- dev->n_erased_blocks++;
- dev->n_free_chunks += dev->param.chunks_per_block;
- } else if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
-
- /* Determine the highest sequence number */
- if (seq_number >= YAFFS_LOWEST_SEQUENCE_NUMBER &&
- seq_number < YAFFS_HIGHEST_SEQUENCE_NUMBER) {
-
- block_index[n_to_scan].seq = seq_number;
- block_index[n_to_scan].block = blk;
-
- n_to_scan++;
-
- if (seq_number >= dev->seq_number)
- dev->seq_number = seq_number;
- } else {
- /* TODO: Nasty sequence number! */
- yaffs_trace(YAFFS_TRACE_SCAN,
- "Block scanning block %d has bad sequence number %d",
- blk, seq_number);
-
- }
- }
- bi++;
- }
-
- yaffs_trace(YAFFS_TRACE_SCAN, "%d blocks to be sorted...", n_to_scan);
-
- cond_resched();
-
- /* Sort the blocks by sequence number */
- sort(block_index, n_to_scan, sizeof(struct yaffs_block_index),
- yaffs2_ybicmp, NULL);
-
- cond_resched();
-
- yaffs_trace(YAFFS_TRACE_SCAN, "...done");
-
- /* Now scan the blocks looking at the data. */
- start_iter = 0;
- end_iter = n_to_scan - 1;
- yaffs_trace(YAFFS_TRACE_SCAN_DEBUG, "%d blocks to scan", n_to_scan);
-
- /* For each block.... backwards */
- for (block_iter = end_iter; !alloc_failed && block_iter >= start_iter;
- block_iter--) {
- /* Cooperative multitasking! This loop can run for so
- long that watchdog timers expire. */
- cond_resched();
-
- /* get the block to scan in the correct order */
- blk = block_index[block_iter].block;
-
- bi = yaffs_get_block_info(dev, blk);
-
- state = bi->block_state;
-
- deleted = 0;
-
- /* For each chunk in each block that needs scanning.... */
- found_chunks = 0;
- for (c = dev->param.chunks_per_block - 1;
- !alloc_failed && c >= 0 &&
- (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING ||
- state == YAFFS_BLOCK_STATE_ALLOCATING); c--) {
- /* Scan backwards...
- * Read the tags and decide what to do
- */
-
- chunk = blk * dev->param.chunks_per_block + c;
-
- result = yaffs_rd_chunk_tags_nand(dev, chunk, NULL,
- &tags);
-
- /* Let's have a good look at this chunk... */
-
- if (!tags.chunk_used) {
- /* An unassigned chunk in the block.
- * If there are used chunks after this one, then
- * it is a chunk that was skipped due to failing the erased
- * check. Just skip it so that it can be deleted.
- * But, more typically, We get here when this is an unallocated
- * chunk and his means that either the block is empty or
- * this is the one being allocated from
- */
-
- if (found_chunks) {
- /* This is a chunk that was skipped due to failing the erased check */
- } else if (c == 0) {
- /* We're looking at the first chunk in the block so the block is unused */
- state = YAFFS_BLOCK_STATE_EMPTY;
- dev->n_erased_blocks++;
- } else {
- if (state ==
- YAFFS_BLOCK_STATE_NEEDS_SCANNING
- || state ==
- YAFFS_BLOCK_STATE_ALLOCATING) {
- if (dev->seq_number ==
- bi->seq_number) {
- /* this is the block being allocated from */
-
- yaffs_trace(YAFFS_TRACE_SCAN,
- " Allocating from %d %d",
- blk, c);
-
- state =
- YAFFS_BLOCK_STATE_ALLOCATING;
- dev->alloc_block = blk;
- dev->alloc_page = c;
- dev->
- alloc_block_finder =
- blk;
- } else {
- /* This is a partially written block that is not
- * the current allocation block.
- */
-
- yaffs_trace(YAFFS_TRACE_SCAN,
- "Partially written block %d detected",
- blk);
- }
- }
- }
-
- dev->n_free_chunks++;
-
- } else if (tags.ecc_result == YAFFS_ECC_RESULT_UNFIXED) {
- yaffs_trace(YAFFS_TRACE_SCAN,
- " Unfixed ECC in chunk(%d:%d), chunk ignored",
- blk, c);
-
- dev->n_free_chunks++;
-
- } else if (tags.obj_id > YAFFS_MAX_OBJECT_ID ||
- tags.chunk_id > YAFFS_MAX_CHUNK_ID ||
- (tags.chunk_id > 0
- && tags.n_bytes > dev->data_bytes_per_chunk)
- || tags.seq_number != bi->seq_number) {
- yaffs_trace(YAFFS_TRACE_SCAN,
- "Chunk (%d:%d) with bad tags:obj = %d, chunk_id = %d, n_bytes = %d, ignored",
- blk, c, tags.obj_id,
- tags.chunk_id, tags.n_bytes);
-
- dev->n_free_chunks++;
-
- } else if (tags.chunk_id > 0) {
- /* chunk_id > 0 so it is a data chunk... */
- unsigned int endpos;
- u32 chunk_base =
- (tags.chunk_id -
- 1) * dev->data_bytes_per_chunk;
-
- found_chunks = 1;
-
- yaffs_set_chunk_bit(dev, blk, c);
- bi->pages_in_use++;
-
- in = yaffs_find_or_create_by_number(dev,
- tags.obj_id,
- YAFFS_OBJECT_TYPE_FILE);
- if (!in) {
- /* Out of memory */
- alloc_failed = 1;
- }
-
- if (in &&
- in->variant_type == YAFFS_OBJECT_TYPE_FILE
- && chunk_base <
- in->variant.file_variant.shrink_size) {
- /* This has not been invalidated by a resize */
- if (!yaffs_put_chunk_in_file
- (in, tags.chunk_id, chunk, -1)) {
- alloc_failed = 1;
- }
-
- /* File size is calculated by looking at the data chunks if we have not
- * seen an object header yet. Stop this practice once we find an object header.
- */
- endpos = chunk_base + tags.n_bytes;
-
- if (!in->valid && /* have not got an object header yet */
- in->variant.file_variant.
- scanned_size < endpos) {
- in->variant.file_variant.
- scanned_size = endpos;
- in->variant.file_variant.
- file_size = endpos;
- }
-
- } else if (in) {
- /* This chunk has been invalidated by a resize, or a past file deletion
- * so delete the chunk*/
- yaffs_chunk_del(dev, chunk, 1,
- __LINE__);
-
- }
- } else {
- /* chunk_id == 0, so it is an ObjectHeader.
- * Thus, we read in the object header and make the object
- */
- found_chunks = 1;
-
- yaffs_set_chunk_bit(dev, blk, c);
- bi->pages_in_use++;
-
- oh = NULL;
- in = NULL;
-
- if (tags.extra_available) {
- in = yaffs_find_or_create_by_number(dev,
- tags.
- obj_id,
- tags.
- extra_obj_type);
- if (!in)
- alloc_failed = 1;
- }
-
- if (!in ||
- (!in->valid && dev->param.disable_lazy_load)
- || tags.extra_shadows || (!in->valid
- && (tags.obj_id ==
- YAFFS_OBJECTID_ROOT
- || tags.
- obj_id ==
- YAFFS_OBJECTID_LOSTNFOUND)))
- {
-
- /* If we don't have valid info then we need to read the chunk
- * TODO In future we can probably defer reading the chunk and
- * living with invalid data until needed.
- */
-
- result = yaffs_rd_chunk_tags_nand(dev,
- chunk,
- chunk_data,
- NULL);
-
- oh = (struct yaffs_obj_hdr *)chunk_data;
-
- if (dev->param.inband_tags) {
- /* Fix up the header if they got corrupted by inband tags */
- oh->shadows_obj =
- oh->inband_shadowed_obj_id;
- oh->is_shrink =
- oh->inband_is_shrink;
- }
-
- if (!in) {
- in = yaffs_find_or_create_by_number(dev, tags.obj_id, oh->type);
- if (!in)
- alloc_failed = 1;
- }
-
- }
-
- if (!in) {
- /* TODO Hoosterman we have a problem! */
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: Could not make object for object %d at chunk %d during scan",
- tags.obj_id, chunk);
- continue;
- }
-
- if (in->valid) {
- /* We have already filled this one.
- * We have a duplicate that will be discarded, but
- * we first have to suck out resize info if it is a file.
- */
-
- if ((in->variant_type ==
- YAFFS_OBJECT_TYPE_FILE) && ((oh
- &&
- oh->
- type
- ==
- YAFFS_OBJECT_TYPE_FILE)
- ||
- (tags.
- extra_available
- &&
- tags.
- extra_obj_type
- ==
- YAFFS_OBJECT_TYPE_FILE)))
- {
- u32 this_size =
- (oh) ? oh->
- file_size :
- tags.extra_length;
- u32 parent_obj_id =
- (oh) ? oh->parent_obj_id :
- tags.extra_parent_id;
-
- is_shrink =
- (oh) ? oh->
- is_shrink :
- tags.extra_is_shrink;
-
- /* If it is deleted (unlinked at start also means deleted)
- * we treat the file size as being zeroed at this point.
- */
- if (parent_obj_id ==
- YAFFS_OBJECTID_DELETED
- || parent_obj_id ==
- YAFFS_OBJECTID_UNLINKED) {
- this_size = 0;
- is_shrink = 1;
- }
-
- if (is_shrink
- && in->variant.file_variant.
- shrink_size > this_size)
- in->variant.
- file_variant.
- shrink_size =
- this_size;
-
- if (is_shrink)
- bi->has_shrink_hdr = 1;
-
- }
- /* Use existing - destroy this one. */
- yaffs_chunk_del(dev, chunk, 1,
- __LINE__);
-
- }
-
- if (!in->valid && in->variant_type !=
- (oh ? oh->type : tags.extra_obj_type))
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: Bad object type, %d != %d, for object %d at chunk %d during scan",
- oh ?
- oh->type : tags.extra_obj_type,
- in->variant_type, tags.obj_id,
- chunk);
-
- if (!in->valid &&
- (tags.obj_id == YAFFS_OBJECTID_ROOT ||
- tags.obj_id ==
- YAFFS_OBJECTID_LOSTNFOUND)) {
- /* We only load some info, don't fiddle with directory structure */
- in->valid = 1;
-
- if (oh) {
-
- in->yst_mode = oh->yst_mode;
- yaffs_load_attribs(in, oh);
- in->lazy_loaded = 0;
- } else {
- in->lazy_loaded = 1;
- }
- in->hdr_chunk = chunk;
-
- } else if (!in->valid) {
- /* we need to load this info */
-
- in->valid = 1;
- in->hdr_chunk = chunk;
-
- if (oh) {
- in->variant_type = oh->type;
-
- in->yst_mode = oh->yst_mode;
- yaffs_load_attribs(in, oh);
-
- if (oh->shadows_obj > 0)
- yaffs_handle_shadowed_obj
- (dev,
- oh->shadows_obj,
- 1);
-
- yaffs_set_obj_name_from_oh(in,
- oh);
- parent =
- yaffs_find_or_create_by_number
- (dev, oh->parent_obj_id,
- YAFFS_OBJECT_TYPE_DIRECTORY);
-
- file_size = oh->file_size;
- is_shrink = oh->is_shrink;
- equiv_id = oh->equiv_id;
-
- } else {
- in->variant_type =
- tags.extra_obj_type;
- parent =
- yaffs_find_or_create_by_number
- (dev, tags.extra_parent_id,
- YAFFS_OBJECT_TYPE_DIRECTORY);
- file_size = tags.extra_length;
- is_shrink =
- tags.extra_is_shrink;
- equiv_id = tags.extra_equiv_id;
- in->lazy_loaded = 1;
-
- }
- in->dirty = 0;
-
- if (!parent)
- alloc_failed = 1;
-
- /* directory stuff...
- * hook up to parent
- */
-
- if (parent && parent->variant_type ==
- YAFFS_OBJECT_TYPE_UNKNOWN) {
- /* Set up as a directory */
- parent->variant_type =
- YAFFS_OBJECT_TYPE_DIRECTORY;
- INIT_LIST_HEAD(&parent->
- variant.dir_variant.children);
- } else if (!parent
- || parent->variant_type !=
- YAFFS_OBJECT_TYPE_DIRECTORY) {
- /* Hoosterman, another problem....
- * We're trying to use a non-directory as a directory
- */
-
- yaffs_trace(YAFFS_TRACE_ERROR,
- "yaffs tragedy: attempting to use non-directory as a directory in scan. Put in lost+found."
- );
- parent = dev->lost_n_found;
- }
-
- yaffs_add_obj_to_dir(parent, in);
-
- is_unlinked = (parent == dev->del_dir)
- || (parent == dev->unlinked_dir);
-
- if (is_shrink) {
- /* Mark the block as having a shrink header */
- bi->has_shrink_hdr = 1;
- }
-
- /* Note re hardlinks.
- * Since we might scan a hardlink before its equivalent object is scanned
- * we put them all in a list.
- * After scanning is complete, we should have all the objects, so we run
- * through this list and fix up all the chains.
- */
-
- switch (in->variant_type) {
- case YAFFS_OBJECT_TYPE_UNKNOWN:
- /* Todo got a problem */
- break;
- case YAFFS_OBJECT_TYPE_FILE:
-
- if (in->variant.
- file_variant.scanned_size <
- file_size) {
- /* This covers the case where the file size is greater
- * than where the data is
- * This will happen if the file is resized to be larger
- * than its current data extents.
- */
- in->variant.
- file_variant.
- file_size =
- file_size;
- in->variant.
- file_variant.
- scanned_size =
- file_size;
- }
-
- if (in->variant.file_variant.
- shrink_size > file_size)
- in->variant.
- file_variant.
- shrink_size =
- file_size;
-
- break;
- case YAFFS_OBJECT_TYPE_HARDLINK:
- if (!is_unlinked) {
- in->variant.
- hardlink_variant.
- equiv_id = equiv_id;
- in->hard_links.next =
- (struct list_head *)
- hard_list;
- hard_list = in;
- }
- break;
- case YAFFS_OBJECT_TYPE_DIRECTORY:
- /* Do nothing */
- break;
- case YAFFS_OBJECT_TYPE_SPECIAL:
- /* Do nothing */
- break;
- case YAFFS_OBJECT_TYPE_SYMLINK:
- if (oh) {
- in->variant.
- symlink_variant.
- alias =
- yaffs_clone_str(oh->
- alias);
- if (!in->variant.
- symlink_variant.
- alias)
- alloc_failed =
- 1;
- }
- break;
- }
-
- }
-
- }
-
- } /* End of scanning for each chunk */
-
- if (state == YAFFS_BLOCK_STATE_NEEDS_SCANNING) {
- /* If we got this far while scanning, then the block is fully allocated. */
- state = YAFFS_BLOCK_STATE_FULL;
- }
-
- bi->block_state = state;
-
- /* Now let's see if it was dirty */
- if (bi->pages_in_use == 0 &&
- !bi->has_shrink_hdr &&
- bi->block_state == YAFFS_BLOCK_STATE_FULL) {
- yaffs_block_became_dirty(dev, blk);
- }
-
- }
-
- yaffs_skip_rest_of_block(dev);
-
- if (alt_block_index)
- vfree(block_index);
- else
- kfree(block_index);
-
- /* Ok, we've done all the scanning.
- * Fix up the hard link chains.
- * We should now have scanned all the objects, now it's time to add these
- * hardlinks.
- */
- yaffs_link_fixup(dev, hard_list);
-
- yaffs_release_temp_buffer(dev, chunk_data, __LINE__);
-
- if (alloc_failed)
- return YAFFS_FAIL;
-
- yaffs_trace(YAFFS_TRACE_SCAN, "yaffs2_scan_backwards ends");
-
- return YAFFS_OK;
-}
diff --git a/fs/yaffs2/yaffs_yaffs2.h b/fs/yaffs2/yaffs_yaffs2.h
deleted file mode 100644
index e1a9287..0000000
--- a/fs/yaffs2/yaffs_yaffs2.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YAFFS_YAFFS2_H__
-#define __YAFFS_YAFFS2_H__
-
-#include "yaffs_guts.h"
-
-void yaffs_calc_oldest_dirty_seq(struct yaffs_dev *dev);
-void yaffs2_find_oldest_dirty_seq(struct yaffs_dev *dev);
-void yaffs2_clear_oldest_dirty_seq(struct yaffs_dev *dev,
- struct yaffs_block_info *bi);
-void yaffs2_update_oldest_dirty_seq(struct yaffs_dev *dev, unsigned block_no,
- struct yaffs_block_info *bi);
-int yaffs_block_ok_for_gc(struct yaffs_dev *dev, struct yaffs_block_info *bi);
-u32 yaffs2_find_refresh_block(struct yaffs_dev *dev);
-int yaffs2_checkpt_required(struct yaffs_dev *dev);
-int yaffs_calc_checkpt_blocks_required(struct yaffs_dev *dev);
-
-void yaffs2_checkpt_invalidate(struct yaffs_dev *dev);
-int yaffs2_checkpt_save(struct yaffs_dev *dev);
-int yaffs2_checkpt_restore(struct yaffs_dev *dev);
-
-int yaffs2_handle_hole(struct yaffs_obj *obj, loff_t new_size);
-int yaffs2_scan_backwards(struct yaffs_dev *dev);
-
-#endif
diff --git a/fs/yaffs2/yportenv.h b/fs/yaffs2/yportenv.h
deleted file mode 100644
index 8183425..0000000
--- a/fs/yaffs2/yportenv.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * YAFFS: Yet another Flash File System . A NAND-flash specific file system.
- *
- * Copyright (C) 2002-2010 Aleph One Ltd.
- * for Toby Churchill Ltd and Brightstar Engineering
- *
- * Created by Charles Manning <charles@aleph1.co.uk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License version 2.1 as
- * published by the Free Software Foundation.
- *
- * Note: Only YAFFS headers are LGPL, YAFFS C code is covered by GPL.
- */
-
-#ifndef __YPORTENV_LINUX_H__
-#define __YPORTENV_LINUX_H__
-
-#include <linux/version.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/sched.h>
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/xattr.h>
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/sort.h>
-#include <linux/bitops.h>
-
-#define YCHAR char
-#define YUCHAR unsigned char
-#define _Y(x) x
-
-#define YAFFS_LOSTNFOUND_NAME "lost+found"
-#define YAFFS_LOSTNFOUND_PREFIX "obj"
-
-
-#define YAFFS_ROOT_MODE 0755
-#define YAFFS_LOSTNFOUND_MODE 0700
-
-#define Y_CURRENT_TIME CURRENT_TIME.tv_sec
-#define Y_TIME_CONVERT(x) (x).tv_sec
-
-#define compile_time_assertion(assertion) \
- ({ int x = __builtin_choose_expr(assertion, 0, (void)0); (void) x; })
-
-
-#ifndef Y_DUMP_STACK
-#define Y_DUMP_STACK() dump_stack()
-#endif
-
-#define yaffs_trace(msk, fmt, ...) do { \
- if(yaffs_trace_mask & (msk)) \
- printk(KERN_DEBUG "yaffs: " fmt "\n", ##__VA_ARGS__); \
-} while(0)
-
-#ifndef YBUG
-#define YBUG() do {\
- yaffs_trace(YAFFS_TRACE_BUG,\
- "bug " __FILE__ " %d",\
- __LINE__);\
- Y_DUMP_STACK();\
-} while (0)
-#endif
-
-#endif
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 17c20c7..9f9aa32 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
@@ -214,7 +214,7 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
{ CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL, "gc_min_interval" },
{ CTL_INT, NET_IPV4_ROUTE_GC_MIN_INTERVAL_MS, "gc_min_interval_ms" },
{ CTL_INT, NET_IPV4_ROUTE_GC_TIMEOUT, "gc_timeout" },
- { CTL_INT, NET_IPV4_ROUTE_GC_INTERVAL, "gc_interval" },
+ /* NET_IPV4_ROUTE_GC_INTERVAL "gc_interval" no longer used */
{ CTL_INT, NET_IPV4_ROUTE_REDIRECT_LOAD, "redirect_load" },
{ CTL_INT, NET_IPV4_ROUTE_REDIRECT_NUMBER, "redirect_number" },
{ CTL_INT, NET_IPV4_ROUTE_REDIRECT_SILENCE, "redirect_silence" },
diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c
index 4e4932a..362da65 100644
--- a/kernel/sysctl_check.c
+++ b/kernel/sysctl_check.c
@@ -1,6 +1,6 @@
#include <linux/stat.h>
#include <linux/sysctl.h>
-#include "../fs/xfs/linux-2.6/xfs_sysctl.h"
+#include "../fs/xfs/xfs_sysctl.h"
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>